Skip to main content

container/
cmaf.rs

1//! Fragmented MP4 / CMAF box writers.
2//!
3//! Produces ISO/IEC 14496-12 §8.8 movie-fragment boxes (`moof` / `mfhd` /
4//! `traf` / `tfhd` / `tfdt` / `trun`) and the corresponding `mvex` /
5//! `mehd` / `trex` declarations that go inside a CMAF init segment's
6//! `moov`. CMAF (ISO/IEC 23000-19) constrains the general 14496-12 model:
7//! exactly one track per fragment (one `traf` per `moof`), exactly one
8//! track per init segment, and a small set of mandatory boxes.
9//!
10//! This module is the box-level primitive layer. Higher-level callers
11//! (`init_segment_video`, `media_segment_video`, etc. in subsequent
12//! commits) compose these into init + media segments. The split lets us
13//! unit-test each box's byte layout against the spec without having to
14//! drive a full encode + segment pipeline.
15//!
16//! Spec citations are given by section number in the relevant box's doc
17//! comment so future readers can cross-check against the standard.
18//!
19//! # CMAF brand
20//!
21//! Init segments for video tracks declare the `cmfc` brand (CMAF
22//! constraints, per CMAF §7.3.4). Audio tracks use `cmfa`. Both brands
23//! coexist in `compatible_brands` alongside the existing `iso6` / `mp42`
24//! / `av01` brands so non-CMAF-aware tools that consume the same boxes
25//! (e.g. an old ffprobe) can still demux them.
26//!
27//! # Sample-flags packing
28//!
29//! `default_sample_flags` (in `trex` / `tfhd`) and `first_sample_flags`
30//! / per-sample flags (in `trun`) are packed per ISO/IEC 14496-12
31//! §8.8.3.1. The 32 bits are laid out:
32//!
33//! ```text
34//!   reserved[6]      = 0
35//!   is_leading[2]    = 0
36//!   sample_depends_on[2]
37//!   sample_is_depended_on[2]
38//!   sample_has_redundancy[2]
39//!   sample_padding_value[3] = 0
40//!   sample_is_non_sync_sample[1]
41//!   sample_degradation_priority[16] = 0
42//! ```
43//!
44//! For AV1 / AAC the meaningful values are `sample_depends_on = 1`
45//! (this sample depends on others — i.e. P / B / non-IDR) or `2`
46//! (independent — i.e. IDR / sync), and `sample_is_non_sync_sample = 1`
47//! for non-key frames, `0` for keyframes. The helper
48//! [`SampleFlags::pack`] handles this; callers shouldn't compose the
49//! u32 by hand.
50
51use anyhow::{Context, Result};
52use codec::frame::ColorMetadata;
53use std::fs::{self, File};
54use std::io::{BufWriter, Write};
55use std::path::{Path, PathBuf};
56
57use crate::AudioInfo;
58use crate::mux::{BoxBuilder, build_audio_stsd, build_av01, write_unity_matrix};
59
60/// CMAF brand identifiers used in `ftyp.compatible_brands`.
61pub mod brand {
62    /// CMAF video constraints brand (CMAF §7.3.4).
63    pub const CMFC: &[u8; 4] = b"cmfc";
64    /// CMAF audio constraints brand (CMAF §7.3.5).
65    pub const CMFA: &[u8; 4] = b"cmfa";
66}
67
68/// Track type discriminator. CMAF places one track per init / fragment;
69/// this enum is what higher-level orchestration uses to pick which
70/// codec dispatch to take. The init / segment writers themselves don't
71/// take this enum (they have type-specific entry points), so it stays
72/// `#[allow(dead_code)]` until the pipeline orchestrator (Phase 4)
73/// wires it through.
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75#[allow(dead_code)]
76pub enum CmafTrackKind {
77    Video,
78    Audio,
79}
80
81/// Sample flags as packed in `default_sample_flags` / `first_sample_flags` /
82/// per-sample `sample_flags` in `trun`. ISO/IEC 14496-12 §8.8.3.1.
83///
84/// Defaults model an AV1 P-frame: depends-on=1, non-sync=1, no redundancy.
85/// Override `is_sync` for IDR / key samples. The remaining fields aren't
86/// meaningful for our pipeline (no DRM / leading samples / temporal layers
87/// past Annex H), so they stay at their spec-default zero values.
88#[derive(Debug, Clone, Copy)]
89pub struct SampleFlags {
90    /// `sample_is_non_sync_sample` flag. False ⇔ keyframe / IDR.
91    pub is_sync: bool,
92}
93
94impl SampleFlags {
95    /// Pack into the wire-format u32. See module docs for bit layout.
96    pub fn pack(self) -> u32 {
97        // For sync samples: sample_depends_on=2 (no other samples needed
98        // to decode — i.e. independent), sample_is_non_sync_sample=0.
99        // For non-sync: sample_depends_on=1 (depends on prior samples),
100        // sample_is_non_sync_sample=1.
101        if self.is_sync {
102            // depends_on=2 in bits 24-25; is_non_sync=0 in bit 16.
103            0x02_00_00_00
104        } else {
105            // depends_on=1 in bits 24-25; is_non_sync=1 in bit 16.
106            0x01_01_00_00
107        }
108    }
109
110    pub fn keyframe() -> Self {
111        Self { is_sync: true }
112    }
113    pub fn delta_frame() -> Self {
114        Self { is_sync: false }
115    }
116}
117
118/// Per-sample fields written into `trun`. Each entry produces one row
119/// of (duration, size, flags) in the fragment's sample table.
120#[derive(Debug, Clone, Copy)]
121pub struct CmafSample {
122    /// Sample duration in track timescale ticks.
123    pub duration: u32,
124    /// Encoded sample size in bytes.
125    pub size: u32,
126    /// Sample flags (sync / non-sync). The very FIRST sample in a fragment
127    /// uses `first_sample_flags` instead — see `build_trun_video`.
128    pub flags: SampleFlags,
129}
130
131// =====================================================================
132// Box writers
133// =====================================================================
134
135/// `mfhd` — Movie Fragment Header (14496-12 §8.8.5).
136///
137/// Carries the per-fragment sequence number. CMAF requires
138/// `sequence_number` to be monotonic and start at 1 for the first
139/// fragment of each track.
140///
141/// Wire layout (16 bytes total):
142/// ```text
143///   size:u32          = 16
144///   type:'mfhd'
145///   version:u8        = 0
146///   flags:u24         = 0
147///   sequence_number:u32
148/// ```
149pub fn build_mfhd(sequence_number: u32) -> Vec<u8> {
150    let mut b = BoxBuilder::new(b"mfhd");
151    b.u8(0); // version
152    b.extend(&[0, 0, 0]); // flags
153    b.u32(sequence_number);
154    b.finish()
155}
156
157/// `tfhd` — Track Fragment Header (14496-12 §8.8.7).
158///
159/// We always set the `default-base-is-moof` flag (`0x020000`) — required
160/// by CMAF §7.3.2.1. With this flag, sample data offsets in `trun`
161/// become relative to the start of the enclosing `moof`, which is
162/// exactly what HLS-CMAF expects. We avoid emitting `base_data_offset`
163/// (an absolute file offset that breaks segment portability).
164///
165/// Optional fields are emitted based on the bitwise combination of
166/// `tf_flags`:
167///   0x000001 base_data_offset            (NOT emitted; we use default-base-is-moof)
168///   0x000002 sample_description_index    (only if non-default needed)
169///   0x000008 default_sample_duration     (emitted when `default_duration.is_some()`)
170///   0x000010 default_sample_size         (emitted when `default_size.is_some()`)
171///   0x000020 default_sample_flags        (emitted when `default_flags.is_some()`)
172///   0x010000 duration-is-empty
173///   0x020000 default-base-is-moof        (always emitted)
174pub fn build_tfhd(
175    track_id: u32,
176    default_duration: Option<u32>,
177    default_size: Option<u32>,
178    default_flags: Option<u32>,
179) -> Vec<u8> {
180    let mut tf_flags: u32 = 0x020000; // default-base-is-moof
181    if default_duration.is_some() {
182        tf_flags |= 0x000008;
183    }
184    if default_size.is_some() {
185        tf_flags |= 0x000010;
186    }
187    if default_flags.is_some() {
188        tf_flags |= 0x000020;
189    }
190
191    let mut b = BoxBuilder::new(b"tfhd");
192    b.u8(0); // version
193    let flag_bytes = tf_flags.to_be_bytes();
194    b.extend(&flag_bytes[1..]); // 24-bit flags (drop high byte)
195    b.u32(track_id);
196    if let Some(d) = default_duration {
197        b.u32(d);
198    }
199    if let Some(s) = default_size {
200        b.u32(s);
201    }
202    if let Some(f) = default_flags {
203        b.u32(f);
204    }
205    b.finish()
206}
207
208/// `tfdt` — Track Fragment Decode Time (14496-12 §8.8.12).
209///
210/// Carries the absolute decode time of the first sample in this
211/// fragment, in track timescale ticks, accumulated from the start of
212/// the track (NOT from the start of the fragment). Required by CMAF
213/// §7.3.2.1.
214///
215/// We always emit version 1 (u64 decode time). Version 0's u32 wraps
216/// at ~24h for a 48 kHz audio track; version 1 covers >12 million
217/// years at the same rate. The 4 extra bytes are immaterial.
218///
219/// Wire layout (20 bytes total):
220/// ```text
221///   size:u32          = 20
222///   type:'tfdt'
223///   version:u8        = 1
224///   flags:u24         = 0
225///   base_media_decode_time:u64
226/// ```
227pub fn build_tfdt(base_media_decode_time: u64) -> Vec<u8> {
228    let mut b = BoxBuilder::new(b"tfdt");
229    b.u8(1); // version 1
230    b.extend(&[0, 0, 0]); // flags
231    b.u64(base_media_decode_time);
232    b.finish()
233}
234
235/// `trun` — Track Run (14496-12 §8.8.8) for a video fragment.
236///
237/// Encodes the per-sample table for the fragment's run of samples.
238/// CMAF allows multiple `trun`s per `traf` but we always emit exactly
239/// one (cleaner manifest, no functional difference).
240///
241/// Flag bits we always set:
242///   0x000001 data-offset-present       (offset from moof start to mdat data)
243///   0x000004 first-sample-flags-present (override of default for sample 0)
244///   0x000100 sample-duration-present
245///   0x000200 sample-size-present
246///
247/// We don't emit per-sample-flags (0x000400) because all non-first
248/// samples in a video fragment share the default (P-frame), and we
249/// don't emit sample-composition-time-offsets (0x000800) because
250/// AV1 has no B-frame reordering in our pipeline (PTS == DTS).
251///
252/// `data_offset` is the byte offset from the START of the enclosing
253/// `moof` to the first byte of the fragment's `mdat` payload. It
254/// CANNOT be filled in until the full `moof` size is known, so this
255/// builder leaves it as 0 and returns the byte position to be patched.
256/// See [`MoofData::patch_data_offset`].
257fn build_trun_video(samples: &[CmafSample]) -> (Vec<u8>, usize) {
258    let mut b = BoxBuilder::new(b"trun");
259    b.u8(0); // version
260    // Flags: data-offset (1) | first-sample-flags (4) | duration (0x100) | size (0x200)
261    let flags: u32 = 0x000001 | 0x000004 | 0x000100 | 0x000200;
262    let flag_bytes = flags.to_be_bytes();
263    b.extend(&flag_bytes[1..]);
264    b.u32(samples.len() as u32);
265    // data_offset placeholder — final value patched in once moof size is
266    // known. We track its absolute position WITHIN this trun box (header
267    // 8 + version 1 + flags 3 + sample_count 4 = 16) so the caller can
268    // translate to a position-within-moof later.
269    let data_offset_pos_within_trun = b.current_len();
270    b.u32(0); // placeholder
271
272    // first_sample_flags: the spec's standard pattern is to mark sample
273    // 0 explicitly (almost always a sync sample for the first fragment;
274    // for subsequent fragments the first sample is whatever the GOP
275    // boundary produced — typically also sync since CMAF segments must
276    // start with a sync sample per §7.3.2.1).
277    if let Some(first) = samples.first() {
278        b.u32(first.flags.pack());
279    } else {
280        b.u32(0);
281    }
282
283    for s in samples {
284        b.u32(s.duration);
285        b.u32(s.size);
286    }
287
288    let bytes = b.finish();
289    (bytes, data_offset_pos_within_trun)
290}
291
292/// `trun` for an audio fragment. Same shape as video but no sync-flags
293/// distinction (every audio sample is independently decodable in
294/// AAC-LC / Opus / AC-3 / E-AC-3), so we don't emit first-sample-flags
295/// — the default in `trex` / `tfhd` covers them all.
296fn build_trun_audio(samples: &[CmafSample]) -> (Vec<u8>, usize) {
297    let mut b = BoxBuilder::new(b"trun");
298    b.u8(0); // version
299    // Flags: data-offset (1) | duration (0x100) | size (0x200)
300    let flags: u32 = 0x000001 | 0x000100 | 0x000200;
301    let flag_bytes = flags.to_be_bytes();
302    b.extend(&flag_bytes[1..]);
303    b.u32(samples.len() as u32);
304    let data_offset_pos_within_trun = b.current_len();
305    b.u32(0); // placeholder
306
307    for s in samples {
308        b.u32(s.duration);
309        b.u32(s.size);
310    }
311
312    let bytes = b.finish();
313    (bytes, data_offset_pos_within_trun)
314}
315
316/// `mehd` — Movie Extends Header (14496-12 §8.8.2).
317///
318/// Carries the total fragment duration of the longest track, in
319/// movie timescale ticks. CMAF treats this as informational; players
320/// derive actual duration from the sum of per-fragment `trun` rows.
321/// We emit it for spec completeness.
322///
323/// Version 1 (u64 fragment_duration) — same rationale as `tfdt`.
324///
325/// Wire layout (20 bytes total):
326/// ```text
327///   size:u32          = 20
328///   type:'mehd'
329///   version:u8        = 1
330///   flags:u24         = 0
331///   fragment_duration:u64
332/// ```
333pub fn build_mehd(fragment_duration: u64) -> Vec<u8> {
334    let mut b = BoxBuilder::new(b"mehd");
335    b.u8(1); // version 1
336    b.extend(&[0, 0, 0]); // flags
337    b.u64(fragment_duration);
338    b.finish()
339}
340
341/// `trex` — Track Extends (14496-12 §8.8.3).
342///
343/// Per-track defaults that apply to every `trun` in every `moof`
344/// unless overridden via `tfhd`'s default-* fields or per-sample
345/// values in `trun`. The point of `trex` is to keep `moof` boxes
346/// small: if every sample has the same duration / size / flags, the
347/// `trun` can omit them and just inherit from `trex`.
348///
349/// In practice we override `default_sample_duration` / `_size` per
350/// fragment (durations vary slightly with rounding; sizes vary per
351/// sample) so most of these fields just hold spec-zero values. We do
352/// set `default_sample_description_index = 1` since every sample in
353/// our pipeline references the single `stsd` entry built in the
354/// init segment.
355///
356/// Wire layout (32 bytes total):
357/// ```text
358///   size:u32          = 32
359///   type:'trex'
360///   version:u8        = 0
361///   flags:u24         = 0
362///   track_id:u32
363///   default_sample_description_index:u32 = 1
364///   default_sample_duration:u32          = 0
365///   default_sample_size:u32              = 0
366///   default_sample_flags:u32             = 0 (or non-sync default)
367/// ```
368pub fn build_trex(track_id: u32, default_sample_flags: u32) -> Vec<u8> {
369    let mut b = BoxBuilder::new(b"trex");
370    b.u8(0); // version
371    b.extend(&[0, 0, 0]); // flags
372    b.u32(track_id);
373    b.u32(1); // default_sample_description_index
374    b.u32(0); // default_sample_duration (overridden per-fragment)
375    b.u32(0); // default_sample_size (overridden per-sample)
376    b.u32(default_sample_flags);
377    b.finish()
378}
379
380/// `mvex` — Movie Extends container (14496-12 §8.8.1).
381///
382/// Goes inside `moov`. Wraps a single `mehd` plus one `trex` per
383/// track. Presence of `mvex` is what tells a parser this MP4 is
384/// fragmented (i.e. there will be `moof`s following).
385pub fn build_mvex(mehd: &[u8], trexes: &[Vec<u8>]) -> Vec<u8> {
386    let mut b = BoxBuilder::new(b"mvex");
387    b.extend(mehd);
388    for trex in trexes {
389        b.extend(trex);
390    }
391    b.finish()
392}
393
394/// `traf` — Track Fragment (14496-12 §8.8.6).
395///
396/// Wraps `tfhd` + `tfdt` + `trun` for one track inside one `moof`.
397/// CMAF mandates exactly one `traf` per `moof` (§7.3.2.1: "Each CMAF
398/// Fragment SHALL contain exactly one Track Fragment Box.").
399fn build_traf(tfhd: &[u8], tfdt: &[u8], trun: &[u8]) -> Vec<u8> {
400    let mut b = BoxBuilder::new(b"traf");
401    b.extend(tfhd);
402    b.extend(tfdt);
403    b.extend(trun);
404    b.finish()
405}
406
407/// Full `moof` blob with the inner `trun.data_offset` patched up.
408///
409/// Returned by [`build_moof_video`] and [`build_moof_audio`]. Holds the
410/// final byte vector AND knows where inside it the `data_offset` field
411/// lives, so callers can either accept the default offset (immediately
412/// after the moof — i.e. mdat starts right after this moof in the file)
413/// OR substitute their own if they're writing some intervening bytes.
414///
415/// The default `data_offset` is `bytes.len() + 8`: full moof size plus
416/// the 8-byte mdat header. That's the standard "moof immediately
417/// followed by mdat" CMAF layout.
418pub struct MoofData {
419    pub bytes: Vec<u8>,
420    /// Byte position WITHIN `bytes` of the 4-byte big-endian
421    /// `data_offset` field inside `trun`. Use [`Self::patch_data_offset`]
422    /// to overwrite it.
423    pub data_offset_pos: usize,
424}
425
426impl MoofData {
427    /// Patch the `trun.data_offset` field in place. Call once with the
428    /// final byte offset from the START of the moof to the START of
429    /// the mdat payload (i.e. moof_size + 8 for a no-gap layout).
430    pub fn patch_data_offset(&mut self, data_offset: u32) {
431        self.bytes[self.data_offset_pos..self.data_offset_pos + 4]
432            .copy_from_slice(&data_offset.to_be_bytes());
433    }
434
435    /// Convenience: patch with the default no-gap offset (moof
436    /// immediately followed by mdat). Use this in the common case
437    /// where moof + mdat are written contiguously.
438    pub fn patch_default_no_gap(&mut self) {
439        let off = (self.bytes.len() + 8) as u32;
440        self.patch_data_offset(off);
441    }
442}
443
444/// Build a video `moof` for one CMAF fragment.
445///
446/// Composes `mfhd` + `traf{tfhd, tfdt, trun}` and tracks the byte
447/// position of `trun.data_offset` so the caller can patch it once
448/// the moof's final size is known (or accept the default no-gap
449/// layout via [`MoofData::patch_default_no_gap`]).
450pub fn build_moof_video(
451    sequence_number: u32,
452    track_id: u32,
453    base_media_decode_time: u64,
454    samples: &[CmafSample],
455) -> MoofData {
456    let mfhd = build_mfhd(sequence_number);
457    // Default duration/size omitted — they'll vary per-sample, so
458    // emitting them as defaults would be wrong. Default flags set to
459    // delta-frame so per-sample flags are needed only on the first
460    // (sync) sample, which we override via first_sample_flags in trun.
461    let tfhd = build_tfhd(
462        track_id,
463        None,
464        None,
465        Some(SampleFlags::delta_frame().pack()),
466    );
467    let tfdt = build_tfdt(base_media_decode_time);
468    let (trun, data_offset_pos_within_trun) = build_trun_video(samples);
469
470    // Compute where `data_offset` lives within the eventual moof.
471    // moof_header(8) + mfhd(16) + traf_header(8) + tfhd_len + tfdt(20) +
472    //   data_offset_pos_within_trun.
473    let moof_header = 8usize;
474    let traf_header = 8usize;
475    let pos_in_moof = moof_header
476        + mfhd.len()
477        + traf_header
478        + tfhd.len()
479        + tfdt.len()
480        + data_offset_pos_within_trun;
481
482    let traf = build_traf(&tfhd, &tfdt, &trun);
483    let mut b = BoxBuilder::new(b"moof");
484    b.extend(&mfhd);
485    b.extend(&traf);
486    let bytes = b.finish();
487
488    MoofData {
489        bytes,
490        data_offset_pos: pos_in_moof,
491    }
492}
493
494/// Build an audio `moof`. Same composition as video but without
495/// first-sample-flags differentiation in `trun` (every audio sample
496/// is independently decodable).
497pub fn build_moof_audio(
498    sequence_number: u32,
499    track_id: u32,
500    base_media_decode_time: u64,
501    samples: &[CmafSample],
502) -> MoofData {
503    let mfhd = build_mfhd(sequence_number);
504    // Audio default-flags: every sample is independently decodable,
505    // so default to sync.
506    let tfhd = build_tfhd(track_id, None, None, Some(SampleFlags::keyframe().pack()));
507    let tfdt = build_tfdt(base_media_decode_time);
508    let (trun, data_offset_pos_within_trun) = build_trun_audio(samples);
509
510    let moof_header = 8usize;
511    let traf_header = 8usize;
512    let pos_in_moof = moof_header
513        + mfhd.len()
514        + traf_header
515        + tfhd.len()
516        + tfdt.len()
517        + data_offset_pos_within_trun;
518
519    let traf = build_traf(&tfhd, &tfdt, &trun);
520    let mut b = BoxBuilder::new(b"moof");
521    b.extend(&mfhd);
522    b.extend(&traf);
523    let bytes = b.finish();
524
525    MoofData {
526        bytes,
527        data_offset_pos: pos_in_moof,
528    }
529}
530
531// =====================================================================
532// Init segment writers (Phase 1.2)
533// =====================================================================
534//
535// CMAF init segments carry `ftyp + moov` only — no sample data. The
536// `moov.trak.mdia.minf.stbl` has a populated `stsd` (the sample
537// description) but EMPTY `stts/stsc/stsz/stco`. That's how the parser
538// knows samples will arrive in subsequent `moof` boxes via the
539// `mvex/trex` defaults set in this same `moov`.
540//
541// The track is one-per-init per CMAF §7.3.2.1 (each video init carries
542// only the video track, each audio init only the audio track).
543// `track_id = 1` in both cases since each init's `moov` is independent.
544
545/// Build a CMAF video init segment for an AV1 track.
546///
547/// `config_obus` is the LOB-formatted OBU sequence header (with
548/// `obu_has_size_field=1`) — call [`crate::mux::extract_sequence_header`]
549/// against the first encoded packet to get this. `timescale` is the
550/// track's mdhd/mvhd timescale in ticks per second; we recommend
551/// `frame_rate × 1000` rounded to a clean number (e.g. 30000 for 30fps,
552/// 24000 for 24fps) so per-frame durations divide evenly. The fragment
553/// duration in `mehd` is left at 0 (informational; players derive
554/// actual duration from `trun`).
555pub fn build_init_segment_video(
556    width: u32,
557    height: u32,
558    timescale: u32,
559    config_obus: &[u8],
560    color_metadata: &ColorMetadata,
561) -> Vec<u8> {
562    let track_id = 1u32;
563
564    // ftyp — major_brand=iso6, brands include cmfc + av01
565    let ftyp = build_ftyp_video();
566
567    // moov children
568    let mvhd = build_mvhd(timescale, /* duration */ 0, /* next_track_id */ 2);
569    let trak = build_video_trak(
570        width,
571        height,
572        timescale,
573        track_id,
574        config_obus,
575        color_metadata,
576    );
577    let mvex_blob = {
578        let mehd = build_mehd(0);
579        // For video, default sample flags are delta-frame (most samples
580        // in a fragment are P-frames); the IDR opening each fragment
581        // overrides via trun's first_sample_flags. This matches what the
582        // moof writer sets in tfhd.
583        let trex = build_trex(track_id, SampleFlags::delta_frame().pack());
584        build_mvex(&mehd, &[trex])
585    };
586
587    let mut moov = BoxBuilder::new(b"moov");
588    moov.extend(&mvhd);
589    moov.extend(&trak);
590    moov.extend(&mvex_blob);
591    let moov = moov.finish();
592
593    let mut out = Vec::with_capacity(ftyp.len() + moov.len());
594    out.extend_from_slice(&ftyp);
595    out.extend_from_slice(&moov);
596    out
597}
598
599/// Build a CMAF audio init segment.
600///
601/// `audio_info` carries codec / sample_rate / channels / asc_bytes (or
602/// codec_private for Opus / AC-3 / E-AC-3). Same struct the existing
603/// non-fragmented muxer's `with_audio` accepts — see crate::AudioInfo.
604pub fn build_init_segment_audio(audio_info: &AudioInfo) -> Vec<u8> {
605    let track_id = 1u32;
606
607    let ftyp = build_ftyp_audio();
608
609    let mvhd = build_mvhd(
610        audio_info.timescale,
611        /* duration */ 0,
612        /* next_track_id */ 2,
613    );
614    let trak = build_audio_trak(audio_info, track_id);
615    let mvex_blob = {
616        let mehd = build_mehd(0);
617        // Every audio sample is independently decodable — sync default.
618        let trex = build_trex(track_id, SampleFlags::keyframe().pack());
619        build_mvex(&mehd, &[trex])
620    };
621
622    let mut moov = BoxBuilder::new(b"moov");
623    moov.extend(&mvhd);
624    moov.extend(&trak);
625    moov.extend(&mvex_blob);
626    let moov = moov.finish();
627
628    let mut out = Vec::with_capacity(ftyp.len() + moov.len());
629    out.extend_from_slice(&ftyp);
630    out.extend_from_slice(&moov);
631    out
632}
633
634/// `ftyp` for a video init segment. Brands declare `cmfc` (CMAF video
635/// constraints), `av01` (AV1-in-MP4), plus `iso6` / `mp42` / `iso2` for
636/// broad parser compatibility. Major brand is `iso6` (CMAF / 14496-12
637/// edition 6) — Apple's player and ffmpeg both honour it.
638fn build_ftyp_video() -> Vec<u8> {
639    let mut b = BoxBuilder::new(b"ftyp");
640    b.extend(b"iso6"); // major_brand
641    b.u32(0); // minor_version
642    b.extend(b"iso6");
643    b.extend(b"iso2");
644    b.extend(b"mp42");
645    b.extend(brand::CMFC);
646    b.extend(b"av01");
647    b.finish()
648}
649
650/// `ftyp` for an audio init segment. Same as video but `cmfa` brand
651/// instead of `cmfc`, and no `av01` (irrelevant for an audio-only
652/// segment).
653fn build_ftyp_audio() -> Vec<u8> {
654    let mut b = BoxBuilder::new(b"ftyp");
655    b.extend(b"iso6"); // major_brand
656    b.u32(0); // minor_version
657    b.extend(b"iso6");
658    b.extend(b"iso2");
659    b.extend(b"mp42");
660    b.extend(brand::CMFA);
661    b.finish()
662}
663
664/// `mvhd` (14496-12 §8.2.2) — movie header. Same layout as the existing
665/// non-fragmented muxer; reimplemented here because we need a slightly
666/// different `next_track_id` (single-track init segments).
667fn build_mvhd(timescale: u32, duration: u64, next_track_id: u32) -> Vec<u8> {
668    let mut b = BoxBuilder::new(b"mvhd");
669    b.u8(0);
670    b.extend(&[0, 0, 0]);
671    b.u32(0); // creation_time
672    b.u32(0); // modification_time
673    b.u32(timescale);
674    b.u32(duration as u32);
675    b.u32(0x00010000); // rate 1.0
676    b.u16(0x0100); // volume 1.0
677    b.u16(0); // reserved
678    b.u32(0);
679    b.u32(0);
680    write_unity_matrix(&mut b);
681    for _ in 0..6 {
682        b.u32(0);
683    } // pre_defined
684    b.u32(next_track_id);
685    b.finish()
686}
687
688fn build_video_trak(
689    width: u32,
690    height: u32,
691    timescale: u32,
692    track_id: u32,
693    config_obus: &[u8],
694    color_metadata: &ColorMetadata,
695) -> Vec<u8> {
696    let tkhd = build_video_tkhd(width, height, track_id);
697    let mdia = build_video_mdia(width, height, timescale, config_obus, color_metadata);
698    let mut b = BoxBuilder::new(b"trak");
699    b.extend(&tkhd);
700    b.extend(&mdia);
701    b.finish()
702}
703
704fn build_video_tkhd(width: u32, height: u32, track_id: u32) -> Vec<u8> {
705    let mut b = BoxBuilder::new(b"tkhd");
706    b.u8(0);
707    // flags = 0x000003 (track_enabled | track_in_movie). We don't set
708    // 0x000004 (track_in_preview) — that's a QuickTime-flavored bit and
709    // streaming players ignore it.
710    b.extend(&[0, 0, 0x03]);
711    b.u32(0); // creation_time
712    b.u32(0); // modification_time
713    b.u32(track_id);
714    b.u32(0); // reserved
715    b.u32(0); // duration (movie timescale; fragment muxer leaves this 0)
716    b.u32(0);
717    b.u32(0);
718    b.u16(0); // layer
719    b.u16(0); // alternate_group
720    b.u16(0); // volume = 0 for video
721    b.u16(0); // reserved
722    write_unity_matrix(&mut b);
723    b.u32(width << 16); // width 16.16
724    b.u32(height << 16);
725    b.finish()
726}
727
728fn build_video_mdia(
729    width: u32,
730    height: u32,
731    timescale: u32,
732    config_obus: &[u8],
733    color_metadata: &ColorMetadata,
734) -> Vec<u8> {
735    let mdhd = build_mdhd(timescale, 0);
736    let hdlr = build_hdlr(b"vide", "VideoHandler\0");
737    let minf = build_video_minf(width, height, config_obus, color_metadata);
738    let mut b = BoxBuilder::new(b"mdia");
739    b.extend(&mdhd);
740    b.extend(&hdlr);
741    b.extend(&minf);
742    b.finish()
743}
744
745fn build_mdhd(timescale: u32, duration: u64) -> Vec<u8> {
746    let mut b = BoxBuilder::new(b"mdhd");
747    b.u8(0);
748    b.extend(&[0, 0, 0]);
749    b.u32(0); // creation_time
750    b.u32(0); // modification_time
751    b.u32(timescale);
752    b.u32(duration as u32);
753    b.u16(0x55c4); // language 'und'
754    b.u16(0); // pre_defined
755    b.finish()
756}
757
758/// Generic handler box — `'vide'` for video, `'soun'` for audio. The
759/// human-readable name string (with trailing NUL) is purely
760/// informational; ffprobe surfaces it but no playback path consumes it.
761fn build_hdlr(handler_type: &[u8; 4], name: &str) -> Vec<u8> {
762    let mut b = BoxBuilder::new(b"hdlr");
763    b.u8(0);
764    b.extend(&[0, 0, 0]);
765    b.u32(0); // pre_defined
766    b.extend(handler_type);
767    b.u32(0);
768    b.u32(0);
769    b.u32(0); // reserved[3]
770    b.extend(name.as_bytes());
771    b.finish()
772}
773
774fn build_video_minf(
775    width: u32,
776    height: u32,
777    config_obus: &[u8],
778    color_metadata: &ColorMetadata,
779) -> Vec<u8> {
780    let vmhd = build_vmhd();
781    let dinf = build_dinf();
782    let stbl = build_video_stbl_empty(width, height, config_obus, color_metadata);
783    let mut b = BoxBuilder::new(b"minf");
784    b.extend(&vmhd);
785    b.extend(&dinf);
786    b.extend(&stbl);
787    b.finish()
788}
789
790fn build_vmhd() -> Vec<u8> {
791    let mut b = BoxBuilder::new(b"vmhd");
792    b.u8(0);
793    b.extend(&[0, 0, 0x01]); // flags = 1 per spec
794    b.u16(0); // graphicsmode (0 = copy)
795    b.u16(0);
796    b.u16(0);
797    b.u16(0); // opcolor[3] (RGB, 0,0,0)
798    b.finish()
799}
800
801fn build_smhd() -> Vec<u8> {
802    let mut b = BoxBuilder::new(b"smhd");
803    b.u8(0);
804    b.extend(&[0, 0, 0]);
805    b.u16(0); // balance (0 = center)
806    b.u16(0); // reserved
807    b.finish()
808}
809
810/// `dinf` containing a minimal `dref` with one `url ` self-reference.
811/// Required by 14496-12 even when sample data is in the same file.
812fn build_dinf() -> Vec<u8> {
813    let url = {
814        let mut b = BoxBuilder::new(b"url ");
815        b.u8(0); // version
816        b.extend(&[0, 0, 0x01]); // flags = 1 (data is in the same file)
817        b.finish()
818    };
819    let dref = {
820        let mut b = BoxBuilder::new(b"dref");
821        b.u8(0);
822        b.extend(&[0, 0, 0]);
823        b.u32(1); // entry_count
824        b.extend(&url);
825        b.finish()
826    };
827    let mut b = BoxBuilder::new(b"dinf");
828    b.extend(&dref);
829    b.finish()
830}
831
832/// Empty sample tables for a CMAF video init: `stsd` has the av01
833/// sample entry (with av1C, colr, optional mdcv/clli) and the rest of
834/// the tables are empty boxes (entry_count=0).
835fn build_video_stbl_empty(
836    width: u32,
837    height: u32,
838    config_obus: &[u8],
839    color_metadata: &ColorMetadata,
840) -> Vec<u8> {
841    let av01 = build_av01(width, height, config_obus, color_metadata);
842    let stsd = {
843        let mut b = BoxBuilder::new(b"stsd");
844        b.u8(0);
845        b.extend(&[0, 0, 0]);
846        b.u32(1); // entry_count
847        b.extend(&av01);
848        b.finish()
849    };
850    let stts = build_empty_full_box(b"stts");
851    let stsc = build_empty_full_box(b"stsc");
852    let stsz = {
853        let mut b = BoxBuilder::new(b"stsz");
854        b.u8(0);
855        b.extend(&[0, 0, 0]);
856        b.u32(0); // sample_size = 0 → variable, per stsz (then sample_count must be 0 too)
857        b.u32(0); // sample_count = 0
858        b.finish()
859    };
860    let stco = build_empty_full_box(b"stco");
861
862    let mut b = BoxBuilder::new(b"stbl");
863    b.extend(&stsd);
864    b.extend(&stts);
865    b.extend(&stsc);
866    b.extend(&stsz);
867    b.extend(&stco);
868    b.finish()
869}
870
871fn build_audio_trak(info: &AudioInfo, track_id: u32) -> Vec<u8> {
872    let tkhd = build_audio_tkhd(track_id);
873    let mdia = build_audio_mdia(info);
874    let mut b = BoxBuilder::new(b"trak");
875    b.extend(&tkhd);
876    b.extend(&mdia);
877    b.finish()
878}
879
880fn build_audio_tkhd(track_id: u32) -> Vec<u8> {
881    let mut b = BoxBuilder::new(b"tkhd");
882    b.u8(0);
883    b.extend(&[0, 0, 0x03]);
884    b.u32(0);
885    b.u32(0);
886    b.u32(track_id);
887    b.u32(0);
888    b.u32(0);
889    b.u32(0);
890    b.u32(0);
891    b.u16(0); // layer
892    b.u16(0); // alternate_group (audio init has only one track; 0 fine)
893    b.u16(0x0100); // volume 1.0
894    b.u16(0); // reserved
895    write_unity_matrix(&mut b);
896    b.u32(0);
897    b.u32(0); // width / height = 0
898    b.finish()
899}
900
901fn build_audio_mdia(info: &AudioInfo) -> Vec<u8> {
902    let mdhd = build_mdhd(info.timescale, 0);
903    let hdlr = build_hdlr(b"soun", "SoundHandler\0");
904    let minf = build_audio_minf(info);
905    let mut b = BoxBuilder::new(b"mdia");
906    b.extend(&mdhd);
907    b.extend(&hdlr);
908    b.extend(&minf);
909    b.finish()
910}
911
912fn build_audio_minf(info: &AudioInfo) -> Vec<u8> {
913    let smhd = build_smhd();
914    let dinf = build_dinf();
915    let stbl = build_audio_stbl_empty(info);
916    let mut b = BoxBuilder::new(b"minf");
917    b.extend(&smhd);
918    b.extend(&dinf);
919    b.extend(&stbl);
920    b.finish()
921}
922
923fn build_audio_stbl_empty(info: &AudioInfo) -> Vec<u8> {
924    let stsd = build_audio_stsd(info);
925    let stts = build_empty_full_box(b"stts");
926    let stsc = build_empty_full_box(b"stsc");
927    let stsz = {
928        let mut b = BoxBuilder::new(b"stsz");
929        b.u8(0);
930        b.extend(&[0, 0, 0]);
931        b.u32(0);
932        b.u32(0);
933        b.finish()
934    };
935    let stco = build_empty_full_box(b"stco");
936
937    let mut b = BoxBuilder::new(b"stbl");
938    b.extend(&stsd);
939    b.extend(&stts);
940    b.extend(&stsc);
941    b.extend(&stsz);
942    b.extend(&stco);
943    b.finish()
944}
945
946/// Empty FullBox with version 0 + flags 0 + entry_count 0. Layout:
947///   size:u32 = 16 | type | version:u8 = 0 | flags:u24 = 0 | entry_count:u32 = 0
948fn build_empty_full_box(box_type: &[u8; 4]) -> Vec<u8> {
949    let mut b = BoxBuilder::new(box_type);
950    b.u8(0);
951    b.extend(&[0, 0, 0]);
952    b.u32(0);
953    b.finish()
954}
955
956// =====================================================================
957// Stateful per-rendition segmenter (Phase 1.3 + 1.4)
958// =====================================================================
959//
960// Each `CmafVideoMuxer` (one per video rendition) and `CmafAudioMuxer`
961// (one per audio rendition; usually a single instance per asset)
962// accumulates encoded packets in memory and flushes them to disk as
963// CMAF media segments (`seg-NNNNN.m4s` = `moof + mdat`) on demand.
964//
965// Memory ceiling: at most one segment's worth of payload bytes are
966// held at a time (caller flushes at every keyframe boundary that
967// crosses the segment-duration target). For a 4-second 1080p AV1
968// segment at ~3 Mbps that's ~1.5 MB; not a concern at the per-job
969// 4 GiB ceiling.
970//
971// The init segment (`init.mp4`) is written EAGERLY on construction
972// for audio (we have everything we need) and LAZILY on first flush
973// for video (we need the first packet's OBU sequence header to build
974// the av1C config record).
975
976/// Per-segment metadata returned by [`CmafVideoMuxer::flush_segment`] /
977/// [`CmafAudioMuxer::flush_segment`]. These records form the input to
978/// the HLS playlist writer (Phase 3) and the segment-alignment validator
979/// (Phase 5).
980#[derive(Debug, Clone)]
981pub struct SegmentInfo {
982    /// 1-based monotonically increasing sequence number per track.
983    pub sequence_number: u32,
984    /// Path of the `seg-NNNNN.m4s` file on disk.
985    pub path: PathBuf,
986    /// Total file size in bytes (moof + mdat header + payload).
987    pub byte_size: u64,
988    /// Sum of per-sample durations in track-timescale ticks. The HLS
989    /// `EXTINF` line is written from this divided by the timescale.
990    pub duration_ticks: u64,
991}
992
993/// Output of a finalized track muxer: where the init segment lives,
994/// the ordered list of media segments, and the timescale needed to
995/// convert `duration_ticks` to seconds.
996#[derive(Debug, Clone)]
997pub struct CmafTrackManifest {
998    pub init_path: PathBuf,
999    pub segments: Vec<SegmentInfo>,
1000    pub timescale: u32,
1001}
1002
1003impl CmafTrackManifest {
1004    /// Total duration across all segments, in seconds.
1005    pub fn duration_seconds(&self) -> f64 {
1006        let total_ticks: u64 = self.segments.iter().map(|s| s.duration_ticks).sum();
1007        total_ticks as f64 / self.timescale as f64
1008    }
1009}
1010
1011/// One pending video sample inside the muxer's per-segment buffer.
1012struct PendingVideoSample {
1013    payload: Vec<u8>,
1014    duration: u32,
1015    is_keyframe: bool,
1016}
1017
1018/// One pending audio sample.
1019struct PendingAudioSample {
1020    payload: Vec<u8>,
1021    duration: u32,
1022}
1023
1024/// Stateful CMAF video segmenter for one AV1 rendition.
1025///
1026/// Driven by the pipeline:
1027/// 1. Construct with rendition dimensions + output dir + timescale.
1028/// 2. Call `add_packet` for each encoded packet from the encoder.
1029///    The first packet's OBU stream MUST contain a sequence header;
1030///    the muxer extracts it and uses it for `av1C` in the init.mp4
1031///    (written lazily on the first `flush_segment` call).
1032/// 3. Call `flush_segment` whenever a CMAF fragment boundary is
1033///    reached (the orchestrator decides when based on accumulated
1034///    duration + the segment_duration knob).
1035/// 4. After the last packet is added and flushed, call `finalize`
1036///    to consume the muxer and get the [`CmafTrackManifest`].
1037///
1038/// Segment files are named `seg-00001.m4s`, `seg-00002.m4s`, ...
1039/// in the output dir.
1040pub struct CmafVideoMuxer {
1041    output_dir: PathBuf,
1042    width: u32,
1043    height: u32,
1044    timescale: u32,
1045    color_metadata: ColorMetadata,
1046    track_id: u32,
1047    config_obus: Option<Vec<u8>>, // captured from the first packet
1048    init_path: PathBuf,
1049    init_written: bool,
1050    sequence_number: u32,
1051    base_decode_time: u64,
1052    pending: Vec<PendingVideoSample>,
1053    segments: Vec<SegmentInfo>,
1054}
1055
1056/// Optional construction parameters for [`CmafVideoMuxer`]. Defaults
1057/// match the original 5-arg `new()` behaviour: write init.mp4, start
1058/// segment numbering at 1, decode-time at 0.
1059///
1060/// Non-default values are used by the multi-GPU helper-task path
1061/// (see `pipeline::cmaf` helper variant): when multiple muxers share
1062/// a single per-rung output directory, each helper's muxer starts
1063/// at a non-1 `first_segment_index` and the corresponding decode-time
1064/// offset, and only the primary writes `init.mp4`.
1065#[derive(Debug, Clone)]
1066pub struct CmafVideoMuxerOptions {
1067    /// 1-based segment index the muxer's first `flush_segment()` will
1068    /// write. The output file is `seg-{first_segment_index:05}.m4s`.
1069    /// Defaults to `1` (the primary's first segment).
1070    pub first_segment_index: u32,
1071    /// Decode-time (in track-timescale ticks) of the muxer's first
1072    /// segment's first sample. Should equal
1073    /// `(first_segment_index - 1) * segment_duration_ticks` so that
1074    /// `tfdt` is byte-identical to what the primary would produce for
1075    /// the same segment index. Defaults to `0`.
1076    pub first_segment_base_decode_time: u64,
1077    /// When `false`, `flush_segment()` and `finalize()` skip writing
1078    /// `init.mp4`. Use when a sibling muxer (typically the primary)
1079    /// is responsible for the init segment and helpers must not race
1080    /// against it. Defaults to `true`.
1081    pub write_init_segment: bool,
1082}
1083
1084impl Default for CmafVideoMuxerOptions {
1085    fn default() -> Self {
1086        Self {
1087            first_segment_index: 1,
1088            first_segment_base_decode_time: 0,
1089            write_init_segment: true,
1090        }
1091    }
1092}
1093
1094impl CmafVideoMuxer {
1095    /// Construct a new video muxer that writes init.mp4 + segments to
1096    /// `output_dir`. Creates the directory if it doesn't exist.
1097    ///
1098    /// Equivalent to `new_with_options(..., CmafVideoMuxerOptions::default())`.
1099    pub fn new(
1100        output_dir: impl AsRef<Path>,
1101        width: u32,
1102        height: u32,
1103        timescale: u32,
1104        color_metadata: ColorMetadata,
1105    ) -> Result<Self> {
1106        Self::new_with_options(
1107            output_dir,
1108            width,
1109            height,
1110            timescale,
1111            color_metadata,
1112            CmafVideoMuxerOptions::default(),
1113        )
1114    }
1115
1116    /// Construct a muxer with non-default options. See
1117    /// [`CmafVideoMuxerOptions`].
1118    ///
1119    /// The helper-task path uses this to attach to an in-progress rung:
1120    /// the helper's muxer starts numbering segments at the helper's
1121    /// claim range start, advances `tfdt` to the corresponding decode
1122    /// time, and skips the init segment write that the primary owns.
1123    pub fn new_with_options(
1124        output_dir: impl AsRef<Path>,
1125        width: u32,
1126        height: u32,
1127        timescale: u32,
1128        color_metadata: ColorMetadata,
1129        options: CmafVideoMuxerOptions,
1130    ) -> Result<Self> {
1131        assert!(
1132            options.first_segment_index >= 1,
1133            "first_segment_index is 1-based; got {}",
1134            options.first_segment_index,
1135        );
1136        let output_dir = output_dir.as_ref().to_path_buf();
1137        fs::create_dir_all(&output_dir)
1138            .with_context(|| format!("creating CMAF video output dir: {}", output_dir.display()))?;
1139        let init_path = output_dir.join("init.mp4");
1140        Ok(Self {
1141            output_dir,
1142            width,
1143            height,
1144            timescale,
1145            color_metadata,
1146            track_id: 1,
1147            config_obus: None,
1148            init_path,
1149            // When write_init_segment is false, mark init as already
1150            // written so `ensure_init_written` is a no-op. The primary
1151            // is expected to have written (or will write) init.mp4
1152            // separately.
1153            init_written: !options.write_init_segment,
1154            // `flush_segment` pre-increments `sequence_number` before
1155            // writing, so the on-disk segment number equals
1156            // `sequence_number` AFTER the increment. To produce
1157            // `seg-{first_segment_index:05}.m4s` as the first output,
1158            // start at `first_segment_index - 1`.
1159            sequence_number: options.first_segment_index - 1,
1160            base_decode_time: options.first_segment_base_decode_time,
1161            pending: Vec::new(),
1162            segments: Vec::new(),
1163        })
1164    }
1165
1166    /// Add one encoded video packet to the current pending segment.
1167    /// `duration` is in track-timescale ticks. `is_keyframe` must be
1168    /// true for IDR / sync-sample packets — the muxer doesn't peek
1169    /// into the OBU stream to figure that out, and a wrong value
1170    /// will produce a CMAF segment that doesn't decode (the spec
1171    /// requires every segment to start with a sync sample).
1172    pub fn add_packet(&mut self, payload: Vec<u8>, duration: u32, is_keyframe: bool) -> Result<()> {
1173        if self.config_obus.is_none() {
1174            self.config_obus = Some(crate::mux::extract_sequence_header(&payload).context(
1175                "extracting AV1 sequence header from first packet for av1C config record",
1176            )?);
1177        }
1178        self.pending.push(PendingVideoSample {
1179            payload,
1180            duration,
1181            is_keyframe,
1182        });
1183        Ok(())
1184    }
1185
1186    /// Whether the muxer is ready to flush a segment that starts on a
1187    /// sync sample. The first sample in `pending` must be a keyframe.
1188    /// CMAF requires every segment to begin with a sync sample
1189    /// (§7.3.2.1), so the orchestrator should ensure this invariant
1190    /// before calling `flush_segment`.
1191    pub fn first_pending_is_keyframe(&self) -> bool {
1192        self.pending.first().is_some_and(|s| s.is_keyframe)
1193    }
1194
1195    /// Total duration of pending samples in track-timescale ticks. The
1196    /// orchestrator uses this to decide when a segment has reached
1197    /// its target duration.
1198    pub fn pending_duration_ticks(&self) -> u64 {
1199        self.pending.iter().map(|s| s.duration as u64).sum()
1200    }
1201
1202    /// View of segments already flushed to disk. Each entry's
1203    /// `sequence_number` is the segment's 1-based index; `path` is
1204    /// the on-disk location. The helper-task path
1205    /// (`pipeline::cmaf::cmaf_transcode_rung_slice`) reads this
1206    /// between `add_packet` calls to detect "did the last add
1207    /// trigger an auto-flush?" — when `segments().len()` grows, the
1208    /// last entry is the newly-flushed segment.
1209    pub fn segments(&self) -> &[SegmentInfo] {
1210        &self.segments
1211    }
1212
1213    /// Drop every sample currently in the pending buffer without
1214    /// writing them to disk. Used by the helper-task path when its
1215    /// claim has been shrunk by an `attach_helper` and the encoder's
1216    /// lookahead would otherwise produce a segment that conflicts
1217    /// with whichever helper now owns that range.
1218    ///
1219    /// Specifically: when a primary's claim is shrunk from `[0..N)`
1220    /// to `[0..K)`, the primary's encoder has already received
1221    /// frames `K*KI..K*KI+lookahead` by the time the claim-shrink
1222    /// is observed at the segment boundary. Those frames belong to
1223    /// the helper that took `[K..N)`. Discarding the muxer pending
1224    /// + dropping the encoder is the cleanest way to ensure no
1225    /// stale segment file is written for the helper's territory.
1226    pub fn clear_pending(&mut self) {
1227        self.pending.clear();
1228    }
1229
1230    /// Flush pending samples to a new media segment file. Writes
1231    /// `init.mp4` first if it hasn't been written yet (the av1C config
1232    /// record needs the first packet's sequence header). Returns the
1233    /// segment's metadata and clears the pending buffer.
1234    ///
1235    /// No-op if `pending` is empty.
1236    pub fn flush_segment(&mut self) -> Result<Option<SegmentInfo>> {
1237        if self.pending.is_empty() {
1238            return Ok(None);
1239        }
1240        if !self.first_pending_is_keyframe() {
1241            anyhow::bail!(
1242                "CMAF segment must start with a sync sample; first pending sample is not a keyframe \
1243                 (segment_number={}, pending_count={})",
1244                self.sequence_number + 1,
1245                self.pending.len()
1246            );
1247        }
1248        self.ensure_init_written()?;
1249
1250        self.sequence_number += 1;
1251        let seq = self.sequence_number;
1252        let samples_meta: Vec<CmafSample> = self
1253            .pending
1254            .iter()
1255            .map(|s| CmafSample {
1256                duration: s.duration,
1257                size: s.payload.len() as u32,
1258                flags: if s.is_keyframe {
1259                    SampleFlags::keyframe()
1260                } else {
1261                    SampleFlags::delta_frame()
1262                },
1263            })
1264            .collect();
1265        let segment_duration: u64 = samples_meta.iter().map(|s| s.duration as u64).sum();
1266
1267        let mut moof = build_moof_video(seq, self.track_id, self.base_decode_time, &samples_meta);
1268        moof.patch_default_no_gap();
1269
1270        let payload_total: u64 = self.pending.iter().map(|s| s.payload.len() as u64).sum();
1271        let mdat_box_size: u64 = 8 + payload_total;
1272        if mdat_box_size > u32::MAX as u64 {
1273            // Above u32::MAX we'd need a `largesize` mdat (16-byte header).
1274            // For 4-second segments at sane bitrates this is impossible; if
1275            // we ever hit it, bail with a clear error rather than silently
1276            // overflowing.
1277            anyhow::bail!(
1278                "CMAF media segment payload {} bytes exceeds 32-bit mdat size limit",
1279                payload_total
1280            );
1281        }
1282
1283        let path = self.output_dir.join(format!("seg-{:05}.m4s", seq));
1284        let file = File::create(&path)
1285            .with_context(|| format!("creating CMAF segment file: {}", path.display()))?;
1286        let mut writer = BufWriter::new(file);
1287        writer.write_all(&moof.bytes).context("writing moof")?;
1288        writer
1289            .write_all(&(mdat_box_size as u32).to_be_bytes())
1290            .context("writing mdat size")?;
1291        writer.write_all(b"mdat").context("writing mdat type")?;
1292        for sample in &self.pending {
1293            writer
1294                .write_all(&sample.payload)
1295                .context("writing mdat payload")?;
1296        }
1297        writer.flush().context("flushing CMAF segment writer")?;
1298        let byte_size = moof.bytes.len() as u64 + mdat_box_size;
1299
1300        self.base_decode_time += segment_duration;
1301        self.pending.clear();
1302
1303        let info = SegmentInfo {
1304            sequence_number: seq,
1305            path,
1306            byte_size,
1307            duration_ticks: segment_duration,
1308        };
1309        self.segments.push(info.clone());
1310        Ok(Some(info))
1311    }
1312
1313    /// Finalize the muxer: ensures the init segment is on disk (covers
1314    /// the edge case where add_packet was called but flush_segment
1315    /// never was — e.g. an empty source), drops any non-flushed
1316    /// pending samples (caller should have flushed them), and returns
1317    /// the manifest.
1318    pub fn finalize(mut self) -> Result<CmafTrackManifest> {
1319        if !self.pending.is_empty() {
1320            // Flush whatever's left. The caller should have done this
1321            // explicitly; we cover them defensively.
1322            self.flush_segment()?;
1323        }
1324        self.ensure_init_written()?;
1325        Ok(CmafTrackManifest {
1326            init_path: self.init_path,
1327            segments: self.segments,
1328            timescale: self.timescale,
1329        })
1330    }
1331
1332    fn ensure_init_written(&mut self) -> Result<()> {
1333        if self.init_written {
1334            return Ok(());
1335        }
1336        let config = self.config_obus.as_ref().ok_or_else(|| {
1337            anyhow::anyhow!(
1338                "cannot write CMAF video init segment: no AV1 sequence header has been observed yet \
1339                 (must call add_packet at least once before flush_segment / finalize)"
1340            )
1341        })?;
1342        let init = build_init_segment_video(
1343            self.width,
1344            self.height,
1345            self.timescale,
1346            config,
1347            &self.color_metadata,
1348        );
1349        let mut file = File::create(&self.init_path).with_context(|| {
1350            format!(
1351                "creating CMAF video init segment: {}",
1352                self.init_path.display()
1353            )
1354        })?;
1355        file.write_all(&init)
1356            .context("writing CMAF video init segment bytes")?;
1357        file.flush().context("flushing CMAF video init segment")?;
1358        self.init_written = true;
1359        Ok(())
1360    }
1361}
1362
1363/// Stateful CMAF audio segmenter. Same model as the video muxer but
1364/// simpler — every audio sample is independently decodable, so there's
1365/// no first-sample-flags / sync-boundary requirement.
1366pub struct CmafAudioMuxer {
1367    output_dir: PathBuf,
1368    info: AudioInfo,
1369    track_id: u32,
1370    init_path: PathBuf,
1371    init_written: bool,
1372    sequence_number: u32,
1373    base_decode_time: u64,
1374    pending: Vec<PendingAudioSample>,
1375    segments: Vec<SegmentInfo>,
1376}
1377
1378impl CmafAudioMuxer {
1379    pub fn new(output_dir: impl AsRef<Path>, info: AudioInfo) -> Result<Self> {
1380        let output_dir = output_dir.as_ref().to_path_buf();
1381        fs::create_dir_all(&output_dir)
1382            .with_context(|| format!("creating CMAF audio output dir: {}", output_dir.display()))?;
1383        let init_path = output_dir.join("init.mp4");
1384        Ok(Self {
1385            output_dir,
1386            info,
1387            track_id: 1,
1388            init_path,
1389            init_written: false,
1390            sequence_number: 0,
1391            base_decode_time: 0,
1392            pending: Vec::new(),
1393            segments: Vec::new(),
1394        })
1395    }
1396
1397    pub fn add_packet(&mut self, payload: Vec<u8>, duration: u32) -> Result<()> {
1398        self.pending.push(PendingAudioSample { payload, duration });
1399        Ok(())
1400    }
1401
1402    pub fn pending_duration_ticks(&self) -> u64 {
1403        self.pending.iter().map(|s| s.duration as u64).sum()
1404    }
1405
1406    pub fn flush_segment(&mut self) -> Result<Option<SegmentInfo>> {
1407        if self.pending.is_empty() {
1408            return Ok(None);
1409        }
1410        self.ensure_init_written()?;
1411
1412        self.sequence_number += 1;
1413        let seq = self.sequence_number;
1414        let samples_meta: Vec<CmafSample> = self
1415            .pending
1416            .iter()
1417            .map(|s| CmafSample {
1418                duration: s.duration,
1419                size: s.payload.len() as u32,
1420                flags: SampleFlags::keyframe(),
1421            })
1422            .collect();
1423        let segment_duration: u64 = samples_meta.iter().map(|s| s.duration as u64).sum();
1424
1425        let mut moof = build_moof_audio(seq, self.track_id, self.base_decode_time, &samples_meta);
1426        moof.patch_default_no_gap();
1427
1428        let payload_total: u64 = self.pending.iter().map(|s| s.payload.len() as u64).sum();
1429        let mdat_box_size: u64 = 8 + payload_total;
1430        if mdat_box_size > u32::MAX as u64 {
1431            anyhow::bail!(
1432                "CMAF audio media segment payload {} bytes exceeds 32-bit mdat size limit",
1433                payload_total
1434            );
1435        }
1436
1437        let path = self.output_dir.join(format!("seg-{:05}.m4s", seq));
1438        let file = File::create(&path)
1439            .with_context(|| format!("creating CMAF audio segment file: {}", path.display()))?;
1440        let mut writer = BufWriter::new(file);
1441        writer
1442            .write_all(&moof.bytes)
1443            .context("writing audio moof")?;
1444        writer
1445            .write_all(&(mdat_box_size as u32).to_be_bytes())
1446            .context("writing audio mdat size")?;
1447        writer
1448            .write_all(b"mdat")
1449            .context("writing audio mdat type")?;
1450        for sample in &self.pending {
1451            writer
1452                .write_all(&sample.payload)
1453                .context("writing audio mdat payload")?;
1454        }
1455        writer
1456            .flush()
1457            .context("flushing CMAF audio segment writer")?;
1458        let byte_size = moof.bytes.len() as u64 + mdat_box_size;
1459
1460        self.base_decode_time += segment_duration;
1461        self.pending.clear();
1462
1463        let info = SegmentInfo {
1464            sequence_number: seq,
1465            path,
1466            byte_size,
1467            duration_ticks: segment_duration,
1468        };
1469        self.segments.push(info.clone());
1470        Ok(Some(info))
1471    }
1472
1473    pub fn finalize(mut self) -> Result<CmafTrackManifest> {
1474        if !self.pending.is_empty() {
1475            self.flush_segment()?;
1476        }
1477        self.ensure_init_written()?;
1478        let timescale = self.info.timescale;
1479        Ok(CmafTrackManifest {
1480            init_path: self.init_path,
1481            segments: self.segments,
1482            timescale,
1483        })
1484    }
1485
1486    fn ensure_init_written(&mut self) -> Result<()> {
1487        if self.init_written {
1488            return Ok(());
1489        }
1490        let init = build_init_segment_audio(&self.info);
1491        let mut file = File::create(&self.init_path).with_context(|| {
1492            format!(
1493                "creating CMAF audio init segment: {}",
1494                self.init_path.display()
1495            )
1496        })?;
1497        file.write_all(&init)
1498            .context("writing CMAF audio init segment bytes")?;
1499        file.flush().context("flushing CMAF audio init segment")?;
1500        self.init_written = true;
1501        Ok(())
1502    }
1503}
1504
1505#[cfg(test)]
1506mod tests {
1507    use super::*;
1508
1509    fn read_be_u32(buf: &[u8], pos: usize) -> u32 {
1510        u32::from_be_bytes(buf[pos..pos + 4].try_into().unwrap())
1511    }
1512
1513    fn read_be_u64(buf: &[u8], pos: usize) -> u64 {
1514        u64::from_be_bytes(buf[pos..pos + 8].try_into().unwrap())
1515    }
1516
1517    fn box_size_and_type(buf: &[u8]) -> (u32, &[u8]) {
1518        let size = read_be_u32(buf, 0);
1519        let kind = &buf[4..8];
1520        (size, kind)
1521    }
1522
1523    #[test]
1524    fn mfhd_layout_is_16_bytes_with_sequence_number() {
1525        let bytes = build_mfhd(42);
1526        assert_eq!(bytes.len(), 16);
1527        let (size, kind) = box_size_and_type(&bytes);
1528        assert_eq!(size, 16);
1529        assert_eq!(kind, b"mfhd");
1530        assert_eq!(bytes[8], 0); // version
1531        assert_eq!(&bytes[9..12], &[0, 0, 0]); // flags
1532        assert_eq!(read_be_u32(&bytes, 12), 42);
1533    }
1534
1535    #[test]
1536    fn tfhd_minimal_track_id_only_is_16_bytes() {
1537        let bytes = build_tfhd(1, None, None, None);
1538        // 8 (header) + 1 (version) + 3 (flags) + 4 (track_id) = 16.
1539        assert_eq!(bytes.len(), 16);
1540        let (size, kind) = box_size_and_type(&bytes);
1541        assert_eq!(size, 16);
1542        assert_eq!(kind, b"tfhd");
1543        // tf_flags should ONLY have default-base-is-moof (0x020000) set.
1544        let flag_bytes = [0u8, bytes[9], bytes[10], bytes[11]];
1545        let flags = u32::from_be_bytes(flag_bytes);
1546        assert_eq!(flags, 0x020000);
1547        assert_eq!(read_be_u32(&bytes, 12), 1);
1548    }
1549
1550    #[test]
1551    fn tfhd_with_default_flags_only_packs_correct_bits() {
1552        let bytes = build_tfhd(1, None, None, Some(SampleFlags::delta_frame().pack()));
1553        // 8 header + 1 version + 3 flags + 4 track_id + 4 default_sample_flags = 20.
1554        assert_eq!(bytes.len(), 20);
1555        let flag_bytes = [0u8, bytes[9], bytes[10], bytes[11]];
1556        let flags = u32::from_be_bytes(flag_bytes);
1557        // default-base-is-moof (0x020000) | default-sample-flags (0x000020).
1558        assert_eq!(flags, 0x020020);
1559        assert_eq!(read_be_u32(&bytes, 12), 1);
1560        assert_eq!(read_be_u32(&bytes, 16), SampleFlags::delta_frame().pack());
1561    }
1562
1563    #[test]
1564    fn tfhd_with_all_defaults_packs_in_spec_order() {
1565        let bytes = build_tfhd(1, Some(1024), Some(2048), Some(0x01010000));
1566        // 8 + 1 + 3 + 4 + 4 + 4 + 4 = 28.
1567        assert_eq!(bytes.len(), 28);
1568        let flag_bytes = [0u8, bytes[9], bytes[10], bytes[11]];
1569        let flags = u32::from_be_bytes(flag_bytes);
1570        // default-base-is-moof (0x020000) | dur (0x000008) | size (0x000010) | flags (0x000020).
1571        assert_eq!(flags, 0x020038);
1572        assert_eq!(read_be_u32(&bytes, 12), 1);
1573        assert_eq!(read_be_u32(&bytes, 16), 1024); // duration
1574        assert_eq!(read_be_u32(&bytes, 20), 2048); // size
1575        assert_eq!(read_be_u32(&bytes, 24), 0x01010000); // flags
1576    }
1577
1578    #[test]
1579    fn tfdt_v1_carries_u64_decode_time() {
1580        let bytes = build_tfdt(0x0123_4567_89AB_CDEF);
1581        // 8 header + 1 version + 3 flags + 8 decode_time = 20.
1582        assert_eq!(bytes.len(), 20);
1583        assert_eq!(box_size_and_type(&bytes), (20, b"tfdt".as_slice()));
1584        assert_eq!(bytes[8], 1); // version 1
1585        assert_eq!(read_be_u64(&bytes, 12), 0x0123_4567_89AB_CDEF);
1586    }
1587
1588    #[test]
1589    fn mehd_v1_carries_u64_fragment_duration() {
1590        let bytes = build_mehd(1_000_000);
1591        assert_eq!(bytes.len(), 20);
1592        assert_eq!(box_size_and_type(&bytes), (20, b"mehd".as_slice()));
1593        assert_eq!(bytes[8], 1);
1594        assert_eq!(read_be_u64(&bytes, 12), 1_000_000);
1595    }
1596
1597    #[test]
1598    fn trex_layout_is_32_bytes_with_track_id_and_flags() {
1599        let default_flags = SampleFlags::delta_frame().pack();
1600        let bytes = build_trex(2, default_flags);
1601        // 8 + 1 + 3 + 4 + 4 + 4 + 4 + 4 = 32.
1602        assert_eq!(bytes.len(), 32);
1603        assert_eq!(box_size_and_type(&bytes), (32, b"trex".as_slice()));
1604        assert_eq!(read_be_u32(&bytes, 12), 2); // track_id
1605        assert_eq!(read_be_u32(&bytes, 16), 1); // default_sample_description_index
1606        assert_eq!(read_be_u32(&bytes, 20), 0); // default_sample_duration
1607        assert_eq!(read_be_u32(&bytes, 24), 0); // default_sample_size
1608        assert_eq!(read_be_u32(&bytes, 28), default_flags);
1609    }
1610
1611    #[test]
1612    fn sample_flags_pack_distinguishes_sync_from_delta() {
1613        let sync = SampleFlags::keyframe().pack();
1614        let delta = SampleFlags::delta_frame().pack();
1615        assert_ne!(sync, delta);
1616        // Sync: depends_on=2 in bits 24-25, is_non_sync=0 in bit 16.
1617        assert_eq!(sync, 0x02_00_00_00);
1618        // Delta: depends_on=1, is_non_sync=1.
1619        assert_eq!(delta, 0x01_01_00_00);
1620    }
1621
1622    #[test]
1623    fn moof_video_one_keyframe_sample_round_trip() {
1624        let samples = vec![CmafSample {
1625            duration: 1500,
1626            size: 4096,
1627            flags: SampleFlags::keyframe(),
1628        }];
1629        let mut moof = build_moof_video(1, 1, 0, &samples);
1630        moof.patch_default_no_gap();
1631
1632        let (size, kind) = box_size_and_type(&moof.bytes);
1633        assert_eq!(size as usize, moof.bytes.len());
1634        assert_eq!(kind, b"moof");
1635
1636        // mfhd starts at offset 8 (after moof header).
1637        let (mfhd_size, mfhd_kind) = box_size_and_type(&moof.bytes[8..]);
1638        assert_eq!(mfhd_size, 16);
1639        assert_eq!(mfhd_kind, b"mfhd");
1640        assert_eq!(read_be_u32(&moof.bytes, 8 + 12), 1); // sequence_number
1641
1642        // traf starts after mfhd.
1643        let traf_start = 8 + mfhd_size as usize;
1644        let (_, traf_kind) = box_size_and_type(&moof.bytes[traf_start..]);
1645        assert_eq!(traf_kind, b"traf");
1646
1647        // The patched data_offset should equal moof.len() + 8.
1648        let patched = read_be_u32(&moof.bytes, moof.data_offset_pos);
1649        assert_eq!(patched as usize, moof.bytes.len() + 8);
1650
1651        // The first_sample_flags slot in trun should equal the keyframe flags.
1652        // It sits 4 bytes after the data_offset field per the trun layout.
1653        let first_flags = read_be_u32(&moof.bytes, moof.data_offset_pos + 4);
1654        assert_eq!(first_flags, SampleFlags::keyframe().pack());
1655    }
1656
1657    #[test]
1658    fn moof_video_three_samples_records_per_sample_dur_and_size() {
1659        let samples = vec![
1660            CmafSample {
1661                duration: 1500,
1662                size: 4096,
1663                flags: SampleFlags::keyframe(),
1664            },
1665            CmafSample {
1666                duration: 1500,
1667                size: 1024,
1668                flags: SampleFlags::delta_frame(),
1669            },
1670            CmafSample {
1671                duration: 1500,
1672                size: 1024,
1673                flags: SampleFlags::delta_frame(),
1674            },
1675        ];
1676        let mut moof = build_moof_video(2, 1, 6000, &samples);
1677        moof.patch_default_no_gap();
1678
1679        // Walk into trun and read sample_count.
1680        // moof header(8) + mfhd(16) + traf header(8) = 32.
1681        // Then tfhd: 8 + 1 + 3 + 4 + 4 = 20 bytes (track_id + default_flags).
1682        // Then tfdt v1: 20 bytes.
1683        // trun starts at 32 + 20 + 20 = 72.
1684        let trun_start = 8 + 16 + 8 + 20 + 20;
1685        let (_, trun_kind) = box_size_and_type(&moof.bytes[trun_start..]);
1686        assert_eq!(trun_kind, b"trun");
1687        let sample_count = read_be_u32(&moof.bytes, trun_start + 12);
1688        assert_eq!(sample_count, 3);
1689
1690        // Per-sample table starts after data_offset(4) + first_sample_flags(4):
1691        //   trun_start + 8(header) + 1(version) + 3(flags) + 4(count) +
1692        //                4(data_offset) + 4(first_sample_flags) = trun_start + 24.
1693        let table_start = trun_start + 24;
1694        // sample 0: dur=1500, size=4096
1695        assert_eq!(read_be_u32(&moof.bytes, table_start), 1500);
1696        assert_eq!(read_be_u32(&moof.bytes, table_start + 4), 4096);
1697        // sample 1: dur=1500, size=1024
1698        assert_eq!(read_be_u32(&moof.bytes, table_start + 8), 1500);
1699        assert_eq!(read_be_u32(&moof.bytes, table_start + 12), 1024);
1700        // sample 2: dur=1500, size=1024
1701        assert_eq!(read_be_u32(&moof.bytes, table_start + 16), 1500);
1702        assert_eq!(read_be_u32(&moof.bytes, table_start + 20), 1024);
1703    }
1704
1705    #[test]
1706    fn moof_audio_does_not_emit_first_sample_flags() {
1707        let samples = vec![
1708            CmafSample {
1709                duration: 1024,
1710                size: 256,
1711                flags: SampleFlags::keyframe(),
1712            },
1713            CmafSample {
1714                duration: 1024,
1715                size: 256,
1716                flags: SampleFlags::keyframe(),
1717            },
1718        ];
1719        let mut moof = build_moof_audio(1, 2, 0, &samples);
1720        moof.patch_default_no_gap();
1721
1722        // Audio trun flags = 0x000001 | 0x000100 | 0x000200 = 0x000301
1723        // (no first-sample-flags bit, no per-sample-flags bit).
1724        let trun_start = 8 + 16 + 8 + 20 + 20;
1725        let flag_bytes = [
1726            0u8,
1727            moof.bytes[trun_start + 9],
1728            moof.bytes[trun_start + 10],
1729            moof.bytes[trun_start + 11],
1730        ];
1731        let flags = u32::from_be_bytes(flag_bytes);
1732        assert_eq!(flags, 0x000001 | 0x000100 | 0x000200);
1733
1734        // Per-sample table starts after data_offset(4) only — no
1735        // first_sample_flags this time.
1736        //   trun_start + 8 + 1 + 3 + 4 + 4 = trun_start + 20.
1737        let table_start = trun_start + 20;
1738        assert_eq!(read_be_u32(&moof.bytes, table_start), 1024); // sample 0 dur
1739        assert_eq!(read_be_u32(&moof.bytes, table_start + 4), 256); // sample 0 size
1740        assert_eq!(read_be_u32(&moof.bytes, table_start + 8), 1024); // sample 1 dur
1741        assert_eq!(read_be_u32(&moof.bytes, table_start + 12), 256); // sample 1 size
1742    }
1743
1744    #[test]
1745    fn moof_data_offset_patch_is_at_correct_position() {
1746        // Keyframe-only fragment of 1 sample. Data offset is at a
1747        // computable position; verify patch_data_offset writes there.
1748        let samples = vec![CmafSample {
1749            duration: 1500,
1750            size: 1234,
1751            flags: SampleFlags::keyframe(),
1752        }];
1753        let mut moof = build_moof_video(1, 1, 0, &samples);
1754        moof.patch_data_offset(0xDEAD_BEEF);
1755        let read_back = read_be_u32(&moof.bytes, moof.data_offset_pos);
1756        assert_eq!(read_back, 0xDEAD_BEEF);
1757    }
1758
1759    // Synthetic AV1 OBU bytes that contain exactly one
1760    // OBU_SEQUENCE_HEADER (type=1, has_size=1, ext=0). This is what
1761    // `extract_sequence_header` sniffs out of the first encoded packet
1762    // to build the av1C config record. Payload is 1 byte (0xAA) — the
1763    // value is irrelevant for our shape tests; the muxer just round-
1764    // trips it as bytes inside av1C.
1765    fn synthetic_seq_header_packet() -> Vec<u8> {
1766        let header_byte: u8 = (1 << 3) | (1 << 1); // obu_type=1, has_size=1
1767        vec![header_byte, 0x01, 0xAA]
1768    }
1769
1770    fn find_box<'a>(buf: &'a [u8], box_type: &[u8; 4]) -> Option<&'a [u8]> {
1771        let mut pos = 0;
1772        while pos + 8 <= buf.len() {
1773            let size = read_be_u32(buf, pos) as usize;
1774            if size < 8 || pos + size > buf.len() {
1775                return None;
1776            }
1777            let kind = &buf[pos + 4..pos + 8];
1778            if kind == box_type {
1779                return Some(&buf[pos..pos + size]);
1780            }
1781            pos += size;
1782        }
1783        None
1784    }
1785
1786    fn ftyp_compatible_brands(ftyp: &[u8]) -> Vec<&[u8]> {
1787        // size:4 + 'ftyp' + major:4 + minor:4 = 16, then brands[]
1788        let mut brands = Vec::new();
1789        let mut p = 16;
1790        while p + 4 <= ftyp.len() {
1791            brands.push(&ftyp[p..p + 4]);
1792            p += 4;
1793        }
1794        brands
1795    }
1796
1797    #[test]
1798    fn init_segment_video_lists_cmfc_and_av01_brands() {
1799        let init = build_init_segment_video(
1800            1920,
1801            1080,
1802            30000,
1803            &synthetic_seq_header_packet(),
1804            &ColorMetadata::default(),
1805        );
1806        let ftyp = find_box(&init, b"ftyp").expect("init has ftyp");
1807        let brands = ftyp_compatible_brands(ftyp);
1808        assert!(
1809            brands.contains(&b"cmfc".as_slice()),
1810            "cmfc brand missing: {brands:?}"
1811        );
1812        assert!(
1813            brands.contains(&b"av01".as_slice()),
1814            "av01 brand missing: {brands:?}"
1815        );
1816        assert!(
1817            brands.contains(&b"iso6".as_slice()),
1818            "iso6 brand missing: {brands:?}"
1819        );
1820    }
1821
1822    #[test]
1823    fn init_segment_audio_lists_cmfa_brand() {
1824        // ASC bytes for AAC-LC: object_type=2 (LC), sample_rate_index=3 (48 kHz),
1825        // channelConfiguration=2 (stereo).
1826        let info = AudioInfo::aac_lc(48000, 2, vec![0x11, 0x90]);
1827        let init = build_init_segment_audio(&info);
1828        let ftyp = find_box(&init, b"ftyp").expect("init has ftyp");
1829        let brands = ftyp_compatible_brands(ftyp);
1830        assert!(
1831            brands.contains(&b"cmfa".as_slice()),
1832            "cmfa brand missing: {brands:?}"
1833        );
1834        assert!(
1835            !brands.contains(&b"cmfc".as_slice()),
1836            "cmfc should not appear in audio init"
1837        );
1838    }
1839
1840    #[test]
1841    fn init_segment_video_moov_contains_mvex_with_trex() {
1842        let init = build_init_segment_video(
1843            1280,
1844            720,
1845            30000,
1846            &synthetic_seq_header_packet(),
1847            &ColorMetadata::default(),
1848        );
1849        let moov = find_box(&init, b"moov").expect("init has moov");
1850        let mvex = find_box(&moov[8..], b"mvex").expect("moov has mvex");
1851        assert!(
1852            find_box(&mvex[8..], b"trex").is_some(),
1853            "mvex must contain trex"
1854        );
1855        assert!(
1856            find_box(&mvex[8..], b"mehd").is_some(),
1857            "mvex must contain mehd"
1858        );
1859    }
1860
1861    #[test]
1862    fn init_segment_video_stbl_has_empty_sample_tables() {
1863        let init = build_init_segment_video(
1864            1280,
1865            720,
1866            30000,
1867            &synthetic_seq_header_packet(),
1868            &ColorMetadata::default(),
1869        );
1870        let moov = find_box(&init, b"moov").expect("init has moov");
1871        let trak = find_box(&moov[8..], b"trak").expect("moov has trak");
1872        let mdia = find_box(&trak[8..], b"mdia").expect("trak has mdia");
1873        let minf = find_box(&mdia[8..], b"minf").expect("mdia has minf");
1874        let stbl = find_box(&minf[8..], b"stbl").expect("minf has stbl");
1875
1876        // stsz: sample_size=0 (variable), sample_count=0 (no samples in init)
1877        let stsz = find_box(&stbl[8..], b"stsz").expect("stbl has stsz");
1878        // 8 (header) + 1 (version) + 3 (flags) + 4 (sample_size) + 4 (sample_count) = 20.
1879        assert_eq!(stsz.len(), 20);
1880        assert_eq!(read_be_u32(stsz, 12), 0); // sample_size
1881        assert_eq!(read_be_u32(stsz, 16), 0); // sample_count
1882
1883        // stts/stsc/stco: entry_count=0
1884        for box_type in [b"stts", b"stsc", b"stco"] {
1885            let bx = find_box(&stbl[8..], box_type).expect("stbl has empty full box");
1886            assert_eq!(
1887                bx.len(),
1888                16,
1889                "{:?} should be 16-byte empty FullBox",
1890                std::str::from_utf8(box_type).unwrap()
1891            );
1892            assert_eq!(read_be_u32(bx, 12), 0); // entry_count
1893        }
1894
1895        // stsd has exactly one entry — the av01 sample entry.
1896        let stsd = find_box(&stbl[8..], b"stsd").expect("stbl has stsd");
1897        assert_eq!(read_be_u32(stsd, 12), 1); // entry_count
1898        // First sample entry should be av01.
1899        let av01 = &stsd[16..];
1900        assert_eq!(&av01[4..8], b"av01");
1901    }
1902
1903    #[test]
1904    fn cmaf_video_muxer_emits_init_then_segment_files() {
1905        let dir = tempfile::tempdir().unwrap();
1906        let mut muxer =
1907            CmafVideoMuxer::new(dir.path(), 1280, 720, 30000, ColorMetadata::default()).unwrap();
1908
1909        // Two-packet "fragment": one keyframe, one delta. Each "payload"
1910        // starts with the synthetic sequence header (so the muxer's
1911        // first-packet OBU sniff succeeds) but the muxer doesn't care
1912        // about the rest of the payload bytes — it just round-trips
1913        // them through mdat.
1914        let mut k = synthetic_seq_header_packet();
1915        k.extend_from_slice(&[0xDE, 0xAD]);
1916        muxer.add_packet(k, 1500, true).unwrap();
1917        muxer
1918            .add_packet(synthetic_seq_header_packet(), 1500, false)
1919            .unwrap();
1920
1921        let info = muxer
1922            .flush_segment()
1923            .unwrap()
1924            .expect("flush emits a segment");
1925        assert_eq!(info.sequence_number, 1);
1926        assert_eq!(info.duration_ticks, 3000);
1927        assert!(info.path.exists());
1928        assert_eq!(info.path.file_name().unwrap(), "seg-00001.m4s");
1929
1930        // init.mp4 was written lazily on first flush.
1931        let init_path = dir.path().join("init.mp4");
1932        assert!(init_path.exists(), "init.mp4 must exist after first flush");
1933
1934        // Segment file starts with `moof` and contains an `mdat` after.
1935        let seg_bytes = std::fs::read(&info.path).unwrap();
1936        assert_eq!(&seg_bytes[4..8], b"moof");
1937        let moof_size = read_be_u32(&seg_bytes, 0) as usize;
1938        assert_eq!(&seg_bytes[moof_size + 4..moof_size + 8], b"mdat");
1939
1940        // Manifest finalize covers the empty-pending case (we already flushed).
1941        let manifest = muxer.finalize().unwrap();
1942        assert_eq!(manifest.segments.len(), 1);
1943        assert_eq!(manifest.timescale, 30000);
1944        assert!((manifest.duration_seconds() - 0.1).abs() < 1e-6); // 3000/30000 = 0.1s
1945    }
1946
1947    #[test]
1948    fn cmaf_video_muxer_options_default_matches_legacy_new() {
1949        // Calling `new()` and `new_with_options(..., default())` must
1950        // produce byte-identical first-segment output. This is the
1951        // contract that lets every existing call site stay on `new()`
1952        // unmodified.
1953        let dir_a = tempfile::tempdir().unwrap();
1954        let dir_b = tempfile::tempdir().unwrap();
1955        let mut ma = CmafVideoMuxer::new(
1956            dir_a.path(),
1957            1280,
1958            720,
1959            30000,
1960            ColorMetadata::default(),
1961        )
1962        .unwrap();
1963        let mut mb = CmafVideoMuxer::new_with_options(
1964            dir_b.path(),
1965            1280,
1966            720,
1967            30000,
1968            ColorMetadata::default(),
1969            CmafVideoMuxerOptions::default(),
1970        )
1971        .unwrap();
1972
1973        let mut kf = synthetic_seq_header_packet();
1974        kf.extend_from_slice(&[0xDE, 0xAD]);
1975        ma.add_packet(kf.clone(), 1500, true).unwrap();
1976        mb.add_packet(kf, 1500, true).unwrap();
1977
1978        let info_a = ma.flush_segment().unwrap().unwrap();
1979        let info_b = mb.flush_segment().unwrap().unwrap();
1980        assert_eq!(info_a.sequence_number, info_b.sequence_number);
1981        assert_eq!(info_a.duration_ticks, info_b.duration_ticks);
1982        assert_eq!(
1983            info_a.path.file_name().unwrap(),
1984            info_b.path.file_name().unwrap(),
1985        );
1986        // Byte-identical moof+mdat — proves no observable difference.
1987        let bytes_a = std::fs::read(&info_a.path).unwrap();
1988        let bytes_b = std::fs::read(&info_b.path).unwrap();
1989        assert_eq!(bytes_a, bytes_b);
1990        // init.mp4 written in both cases.
1991        assert!(dir_a.path().join("init.mp4").exists());
1992        assert!(dir_b.path().join("init.mp4").exists());
1993    }
1994
1995    #[test]
1996    fn cmaf_video_muxer_first_segment_index_offset_writes_correct_filename() {
1997        // A helper muxer attached at segment 5 of an in-progress rung
1998        // must produce `seg-00005.m4s` as its first output, not 00001.
1999        let dir = tempfile::tempdir().unwrap();
2000        let mut muxer = CmafVideoMuxer::new_with_options(
2001            dir.path(),
2002            1280,
2003            720,
2004            30000,
2005            ColorMetadata::default(),
2006            CmafVideoMuxerOptions {
2007                first_segment_index: 5,
2008                first_segment_base_decode_time: 4 * 3000, // 4 prior segments × 3000-tick duration
2009                write_init_segment: true,
2010            },
2011        )
2012        .unwrap();
2013
2014        let mut kf = synthetic_seq_header_packet();
2015        kf.extend_from_slice(&[0xCA, 0xFE]);
2016        muxer.add_packet(kf, 1500, true).unwrap();
2017        muxer
2018            .add_packet(synthetic_seq_header_packet(), 1500, false)
2019            .unwrap();
2020
2021        let info = muxer.flush_segment().unwrap().unwrap();
2022        assert_eq!(
2023            info.sequence_number, 5,
2024            "first flush of an offset muxer must produce segment number 5",
2025        );
2026        assert_eq!(info.path.file_name().unwrap(), "seg-00005.m4s");
2027
2028        // Second flush continues the sequence at 6.
2029        let mut kf2 = synthetic_seq_header_packet();
2030        kf2.extend_from_slice(&[0xBE, 0xEF]);
2031        muxer.add_packet(kf2, 1500, true).unwrap();
2032        let info2 = muxer.flush_segment().unwrap().unwrap();
2033        assert_eq!(info2.sequence_number, 6);
2034        assert_eq!(info2.path.file_name().unwrap(), "seg-00006.m4s");
2035    }
2036
2037    #[test]
2038    fn cmaf_video_muxer_offset_base_decode_time_propagates_to_tfdt() {
2039        // Verifies the `tfdt` box of the offset muxer's first segment
2040        // carries the configured base_decode_time. Without this, an
2041        // HLS player would see segment 5 starting at decode-time 0,
2042        // producing a buffer underrun at the cut from primary's
2043        // segment 4 to helper's segment 5.
2044        let dir = tempfile::tempdir().unwrap();
2045        let mut muxer = CmafVideoMuxer::new_with_options(
2046            dir.path(),
2047            1280,
2048            720,
2049            30000,
2050            ColorMetadata::default(),
2051            CmafVideoMuxerOptions {
2052                first_segment_index: 5,
2053                first_segment_base_decode_time: 4 * 3000,
2054                write_init_segment: true,
2055            },
2056        )
2057        .unwrap();
2058
2059        let mut kf = synthetic_seq_header_packet();
2060        kf.extend_from_slice(&[0x01, 0x02]);
2061        muxer.add_packet(kf, 1500, true).unwrap();
2062        let info = muxer.flush_segment().unwrap().unwrap();
2063
2064        // Walk the segment bytes: moof > traf > tfdt. tfdt v1 layout:
2065        //   8 bytes box header (size + 'tfdt')
2066        //   1 byte version (=1) + 3 bytes flags
2067        //   8 bytes base_media_decode_time (u64 BE)
2068        let bytes = std::fs::read(&info.path).unwrap();
2069        let moof_size = read_be_u32(&bytes, 0) as usize;
2070        let moof = &bytes[..moof_size];
2071        let traf = find_box(&moof[8..], b"traf").expect("moof has traf");
2072        let tfdt = find_box(&traf[8..], b"tfdt").expect("traf has tfdt");
2073        let version = tfdt[8];
2074        assert_eq!(version, 1, "tfdt should be version 1 (u64 decode time)");
2075        let dt = u64::from_be_bytes([
2076            tfdt[12], tfdt[13], tfdt[14], tfdt[15], tfdt[16], tfdt[17], tfdt[18], tfdt[19],
2077        ]);
2078        assert_eq!(
2079            dt, 12000,
2080            "tfdt base_media_decode_time must equal configured offset (4×3000)",
2081        );
2082    }
2083
2084    #[test]
2085    fn cmaf_video_muxer_write_init_false_skips_init_file() {
2086        // A helper muxer must NOT write init.mp4 — the primary owns
2087        // that file. Verify that flush_segment + finalize do not
2088        // create init.mp4 in the output directory.
2089        let dir = tempfile::tempdir().unwrap();
2090        let mut muxer = CmafVideoMuxer::new_with_options(
2091            dir.path(),
2092            1280,
2093            720,
2094            30000,
2095            ColorMetadata::default(),
2096            CmafVideoMuxerOptions {
2097                first_segment_index: 5,
2098                first_segment_base_decode_time: 4 * 3000,
2099                write_init_segment: false,
2100            },
2101        )
2102        .unwrap();
2103
2104        let mut kf = synthetic_seq_header_packet();
2105        kf.extend_from_slice(&[0x03, 0x04]);
2106        muxer.add_packet(kf, 1500, true).unwrap();
2107        let info = muxer.flush_segment().unwrap().unwrap();
2108        assert!(
2109            info.path.exists(),
2110            "segment file must be written even when init is skipped",
2111        );
2112        let init_path = dir.path().join("init.mp4");
2113        assert!(
2114            !init_path.exists(),
2115            "init.mp4 must NOT be written when write_init_segment=false",
2116        );
2117
2118        // finalize must also not write init.
2119        let _ = muxer.finalize().unwrap();
2120        assert!(
2121            !init_path.exists(),
2122            "finalize must not retroactively write init.mp4 when disabled",
2123        );
2124    }
2125
2126    #[test]
2127    fn cmaf_video_muxer_two_writers_share_output_dir_with_distinct_indices() {
2128        // The actual helper-task contract: primary writes segments
2129        // 1..3 + init.mp4 into dir/. Helper writes segments 3..5 into
2130        // the same dir with write_init_segment=false. After both
2131        // finalize, all 4 segment files plus init.mp4 exist.
2132        let dir = tempfile::tempdir().unwrap();
2133
2134        let mut primary = CmafVideoMuxer::new(
2135            dir.path(),
2136            1280,
2137            720,
2138            30000,
2139            ColorMetadata::default(),
2140        )
2141        .unwrap();
2142        let mut helper = CmafVideoMuxer::new_with_options(
2143            dir.path(),
2144            1280,
2145            720,
2146            30000,
2147            ColorMetadata::default(),
2148            CmafVideoMuxerOptions {
2149                first_segment_index: 3,
2150                first_segment_base_decode_time: 2 * 3000,
2151                write_init_segment: false,
2152            },
2153        )
2154        .unwrap();
2155
2156        // Primary writes segments 1 and 2.
2157        for _ in 0..2 {
2158            let mut kf = synthetic_seq_header_packet();
2159            kf.extend_from_slice(&[0xAA, 0xBB]);
2160            primary.add_packet(kf, 1500, true).unwrap();
2161            primary
2162                .add_packet(synthetic_seq_header_packet(), 1500, false)
2163                .unwrap();
2164            primary.flush_segment().unwrap().unwrap();
2165        }
2166        // Helper writes segments 3 and 4.
2167        for _ in 0..2 {
2168            let mut kf = synthetic_seq_header_packet();
2169            kf.extend_from_slice(&[0xCC, 0xDD]);
2170            helper.add_packet(kf, 1500, true).unwrap();
2171            helper
2172                .add_packet(synthetic_seq_header_packet(), 1500, false)
2173                .unwrap();
2174            helper.flush_segment().unwrap().unwrap();
2175        }
2176
2177        primary.finalize().unwrap();
2178        helper.finalize().unwrap();
2179
2180        // All four segments + one init.mp4 present.
2181        for seg_idx in 1..=4 {
2182            let p = dir.path().join(format!("seg-{seg_idx:05}.m4s"));
2183            assert!(p.exists(), "segment {seg_idx} missing at {}", p.display());
2184        }
2185        let init_path = dir.path().join("init.mp4");
2186        assert!(init_path.exists(), "primary's init.mp4 must be present");
2187    }
2188
2189    #[test]
2190    #[should_panic(expected = "first_segment_index is 1-based")]
2191    fn cmaf_video_muxer_first_segment_index_zero_panics() {
2192        let dir = tempfile::tempdir().unwrap();
2193        let _ = CmafVideoMuxer::new_with_options(
2194            dir.path(),
2195            1280,
2196            720,
2197            30000,
2198            ColorMetadata::default(),
2199            CmafVideoMuxerOptions {
2200                first_segment_index: 0,
2201                first_segment_base_decode_time: 0,
2202                write_init_segment: true,
2203            },
2204        );
2205    }
2206
2207    #[test]
2208    fn cmaf_video_muxer_rejects_segment_starting_on_non_keyframe() {
2209        let dir = tempfile::tempdir().unwrap();
2210        let mut muxer =
2211            CmafVideoMuxer::new(dir.path(), 640, 360, 30000, ColorMetadata::default()).unwrap();
2212        muxer
2213            .add_packet(synthetic_seq_header_packet(), 1500, false)
2214            .unwrap();
2215        let err = muxer
2216            .flush_segment()
2217            .expect_err("must fail when first sample is not sync");
2218        assert!(err.to_string().contains("must start with a sync sample"));
2219    }
2220
2221    #[test]
2222    fn cmaf_audio_muxer_emits_init_and_segments_with_correct_durations() {
2223        let info = AudioInfo {
2224            codec: "aac".into(),
2225            sample_rate: 48000,
2226            channels: 2,
2227            timescale: 48000,
2228            asc_bytes: vec![0x12, 0x10],
2229            codec_private: vec![],
2230        };
2231        let dir = tempfile::tempdir().unwrap();
2232        let mut muxer = CmafAudioMuxer::new(dir.path(), info).unwrap();
2233
2234        // 5 AAC frames at 1024 samples each = 5120 ticks @ 48 kHz =
2235        // ~107 ms total.
2236        for _ in 0..5 {
2237            muxer.add_packet(vec![0xDE; 256], 1024).unwrap();
2238        }
2239        let seg = muxer
2240            .flush_segment()
2241            .unwrap()
2242            .expect("audio segment emitted");
2243        assert_eq!(seg.duration_ticks, 5 * 1024);
2244        assert!(seg.path.exists());
2245        let init_path = dir.path().join("init.mp4");
2246        assert!(init_path.exists());
2247
2248        // Audio segment moof should NOT contain a first_sample_flags
2249        // slot — the trun layout for audio omits that flag bit. We
2250        // already cover this in `moof_audio_does_not_emit_first_sample_flags`;
2251        // here we just verify the file shape is valid.
2252        let bytes = std::fs::read(&seg.path).unwrap();
2253        assert_eq!(&bytes[4..8], b"moof");
2254
2255        let manifest = muxer.finalize().unwrap();
2256        assert_eq!(manifest.timescale, 48000);
2257        assert!((manifest.duration_seconds() - (5.0 * 1024.0 / 48000.0)).abs() < 1e-6);
2258    }
2259
2260    #[test]
2261    fn mvex_wraps_mehd_and_one_or_more_trex_in_order() {
2262        let mehd = build_mehd(10_000);
2263        let trex_v = build_trex(1, SampleFlags::delta_frame().pack());
2264        let trex_a = build_trex(2, SampleFlags::keyframe().pack());
2265        let mvex = build_mvex(&mehd, &[trex_v.clone(), trex_a.clone()]);
2266        let (size, kind) = box_size_and_type(&mvex);
2267        assert_eq!(size as usize, mvex.len());
2268        assert_eq!(kind, b"mvex");
2269        // 8 (header) + mehd(20) + trex(32) + trex(32) = 92.
2270        assert_eq!(mvex.len(), 8 + mehd.len() + trex_v.len() + trex_a.len());
2271        // First child is mehd.
2272        let (_, child0_kind) = box_size_and_type(&mvex[8..]);
2273        assert_eq!(child0_kind, b"mehd");
2274        // Second child is the first trex.
2275        let (_, child1_kind) = box_size_and_type(&mvex[8 + mehd.len()..]);
2276        assert_eq!(child1_kind, b"trex");
2277    }
2278}