Skip to main content

container/cmaf/
mod.rs

1//! Fragmented MP4 / CMAF box writers.
2//!
3//! Produces ISO/IEC 14496-12 §8.8 movie-fragment boxes (`moof` / `mfhd` /
4//! `traf` / `tfhd` / `tfdt` / `trun`) and the corresponding `mvex` /
5//! `mehd` / `trex` declarations that go inside a CMAF init segment's
6//! `moov`. CMAF (ISO/IEC 23000-19) constrains the general 14496-12 model:
7//! exactly one track per fragment (one `traf` per `moof`), exactly one
8//! track per init segment, and a small set of mandatory boxes.
9//!
10//! This module is the box-level primitive layer. Higher-level callers
11//! (`init_segment_video`, `media_segment_video`, etc. in subsequent
12//! commits) compose these into init + media segments. The split lets us
13//! unit-test each box's byte layout against the spec without having to
14//! drive a full encode + segment pipeline.
15//!
16//! Spec citations are given by section number in the relevant box's doc
17//! comment so future readers can cross-check against the standard.
18//!
19//! # CMAF brand
20//!
21//! Init segments for video tracks declare the `cmfc` brand (CMAF
22//! constraints, per CMAF §7.3.4). Audio tracks use `cmfa`. Both brands
23//! coexist in `compatible_brands` alongside the existing `iso6` / `mp42`
24//! / `av01` brands so non-CMAF-aware tools that consume the same boxes
25//! (e.g. an old ffprobe) can still demux them.
26//!
27//! # Sample-flags packing
28//!
29//! `default_sample_flags` (in `trex` / `tfhd`) and `first_sample_flags`
30//! / per-sample flags (in `trun`) are packed per ISO/IEC 14496-12
31//! §8.8.3.1. The 32 bits are laid out:
32//!
33//! ```text
34//!   reserved[6]      = 0
35//!   is_leading[2]    = 0
36//!   sample_depends_on[2]
37//!   sample_is_depended_on[2]
38//!   sample_has_redundancy[2]
39//!   sample_padding_value[3] = 0
40//!   sample_is_non_sync_sample[1]
41//!   sample_degradation_priority[16] = 0
42//! ```
43//!
44//! For AV1 / AAC the meaningful values are `sample_depends_on = 1`
45//! (this sample depends on others — i.e. P / B / non-IDR) or `2`
46//! (independent — i.e. IDR / sync), and `sample_is_non_sync_sample = 1`
47//! for non-key frames, `0` for keyframes. The helper
48//! [`SampleFlags::pack`] handles this; callers shouldn't compose the
49//! u32 by hand.
50
51use anyhow::{Context, Result};
52use codec::frame::{ColorMetadata, VideoCodec};
53use std::fs::{self, File};
54use std::io::{BufWriter, Write};
55use std::path::{Path, PathBuf};
56
57use crate::AudioInfo;
58use crate::mux::{build_avc1, build_avcc, build_hvc1, build_hvcc, extract_sequence_header};
59use crate::nal_mux::{NalMuxCodec, NalSampleWriter};
60
61mod fragment;
62mod init;
63#[cfg(test)]
64mod tests;
65
66pub use fragment::*;
67pub use init::*;
68
69// =====================================================================
70// Shared types (re-used by fragment.rs, init.rs, and the muxers here)
71// =====================================================================
72
73/// CMAF brand identifiers used in `ftyp.compatible_brands`.
74pub mod brand {
75    /// CMAF video constraints brand (CMAF §7.3.4).
76    pub const CMFC: &[u8; 4] = b"cmfc";
77    /// CMAF audio constraints brand (CMAF §7.3.5).
78    pub const CMFA: &[u8; 4] = b"cmfa";
79}
80
81/// Track type discriminator. CMAF places one track per init / fragment;
82/// this enum is what higher-level orchestration uses to pick which
83/// codec dispatch to take. The init / segment writers themselves don't
84/// take this enum (they have type-specific entry points), so it stays
85/// `#[allow(dead_code)]` until the pipeline orchestrator (Phase 4)
86/// wires it through.
87#[derive(Debug, Clone, Copy, PartialEq, Eq)]
88#[allow(dead_code)]
89pub enum CmafTrackKind {
90    Video,
91    Audio,
92}
93
94/// Sample flags as packed in `default_sample_flags` / `first_sample_flags` /
95/// per-sample `sample_flags` in `trun`. ISO/IEC 14496-12 §8.8.3.1.
96///
97/// Defaults model an AV1 P-frame: depends-on=1, non-sync=1, no redundancy.
98/// Override `is_sync` for IDR / key samples. The remaining fields aren't
99/// meaningful for our pipeline (no DRM / leading samples / temporal layers
100/// past Annex H), so they stay at their spec-default zero values.
101#[derive(Debug, Clone, Copy)]
102pub struct SampleFlags {
103    /// `sample_is_non_sync_sample` flag. False ⇔ keyframe / IDR.
104    pub is_sync: bool,
105}
106
107impl SampleFlags {
108    /// Pack into the wire-format u32. See module docs for bit layout.
109    pub fn pack(self) -> u32 {
110        // For sync samples: sample_depends_on=2 (no other samples needed
111        // to decode — i.e. independent), sample_is_non_sync_sample=0.
112        // For non-sync: sample_depends_on=1 (depends on prior samples),
113        // sample_is_non_sync_sample=1.
114        if self.is_sync {
115            // depends_on=2 in bits 24-25; is_non_sync=0 in bit 16.
116            0x02_00_00_00
117        } else {
118            // depends_on=1 in bits 24-25; is_non_sync=1 in bit 16.
119            0x01_01_00_00
120        }
121    }
122
123    pub fn keyframe() -> Self {
124        Self { is_sync: true }
125    }
126    pub fn delta_frame() -> Self {
127        Self { is_sync: false }
128    }
129}
130
131/// Per-sample fields written into `trun`. Each entry produces one row
132/// of (duration, size, flags) in the fragment's sample table.
133#[derive(Debug, Clone, Copy)]
134pub struct CmafSample {
135    /// Sample duration in track timescale ticks.
136    pub duration: u32,
137    /// Encoded sample size in bytes.
138    pub size: u32,
139    /// Sample flags (sync / non-sync). The very FIRST sample in a fragment
140    /// uses `first_sample_flags` instead — see `build_trun_video`.
141    pub flags: SampleFlags,
142}
143
144// =====================================================================
145// Stateful per-rendition segmenter types
146// =====================================================================
147
148/// Per-segment metadata returned by [`CmafVideoMuxer::flush_segment`] /
149/// [`CmafAudioMuxer::flush_segment`]. These records form the input to
150/// the HLS playlist writer (Phase 3) and the segment-alignment validator
151/// (Phase 5).
152#[derive(Debug, Clone)]
153pub struct SegmentInfo {
154    /// 1-based monotonically increasing sequence number per track.
155    pub sequence_number: u32,
156    /// Path of the `seg-NNNNN.m4s` file on disk.
157    pub path: PathBuf,
158    /// Total file size in bytes (moof + mdat header + payload).
159    pub byte_size: u64,
160    /// Sum of per-sample durations in track-timescale ticks. The HLS
161    /// `EXTINF` line is written from this divided by the timescale.
162    pub duration_ticks: u64,
163}
164
165/// Output of a finalized track muxer: where the init segment lives,
166/// the ordered list of media segments, and the timescale needed to
167/// convert `duration_ticks` to seconds.
168#[derive(Debug, Clone)]
169pub struct CmafTrackManifest {
170    pub init_path: PathBuf,
171    pub segments: Vec<SegmentInfo>,
172    pub timescale: u32,
173}
174
175impl CmafTrackManifest {
176    /// Total duration across all segments, in seconds.
177    pub fn duration_seconds(&self) -> f64 {
178        let total_ticks: u64 = self.segments.iter().map(|s| s.duration_ticks).sum();
179        total_ticks as f64 / self.timescale as f64
180    }
181}
182
183/// One pending video sample inside the muxer's per-segment buffer.
184struct PendingVideoSample {
185    payload: Vec<u8>,
186    duration: u32,
187    is_keyframe: bool,
188}
189
190/// One pending audio sample.
191struct PendingAudioSample {
192    payload: Vec<u8>,
193    duration: u32,
194}
195
196// =====================================================================
197// CmafVideoMuxer
198// =====================================================================
199
200/// Optional construction parameters for [`CmafVideoMuxer`]. Defaults
201/// match the original 5-arg `new()` behaviour: write init.mp4, start
202/// segment numbering at 1, decode-time at 0.
203///
204/// Non-default values are used by the multi-GPU helper-task path
205/// (see `pipeline::cmaf` helper variant): when multiple muxers share
206/// a single per-rung output directory, each helper's muxer starts
207/// at a non-1 `first_segment_index` and the corresponding decode-time
208/// offset, and only the primary writes `init.mp4`.
209#[derive(Debug, Clone)]
210pub struct CmafVideoMuxerOptions {
211    /// 1-based segment index the muxer's first `flush_segment()` will
212    /// write. The output file is `seg-{first_segment_index:05}.m4s`.
213    /// Defaults to `1` (the primary's first segment).
214    pub first_segment_index: u32,
215    /// Decode-time (in track-timescale ticks) of the muxer's first
216    /// segment's first sample. Should equal
217    /// `(first_segment_index - 1) * segment_duration_ticks` so that
218    /// `tfdt` is byte-identical to what the primary would produce for
219    /// the same segment index. Defaults to `0`.
220    pub first_segment_base_decode_time: u64,
221    /// When `false`, `flush_segment()` and `finalize()` skip writing
222    /// `init.mp4`. Use when a sibling muxer (typically the primary)
223    /// is responsible for the init segment and helpers must not race
224    /// against it. Defaults to `true`.
225    pub write_init_segment: bool,
226}
227
228impl Default for CmafVideoMuxerOptions {
229    fn default() -> Self {
230        Self {
231            first_segment_index: 1,
232            first_segment_base_decode_time: 0,
233            write_init_segment: true,
234        }
235    }
236}
237
238/// Stateful CMAF video segmenter for one AV1 rendition.
239///
240/// Driven by the pipeline:
241/// 1. Construct with rendition dimensions + output dir + timescale.
242/// 2. Call `add_packet` for each encoded packet from the encoder.
243///    The first packet's OBU stream MUST contain a sequence header;
244///    the muxer extracts it and uses it for `av1C` in the init.mp4
245///    (written lazily on the first `flush_segment` call).
246/// 3. Call `flush_segment` whenever a CMAF fragment boundary is
247///    reached (the orchestrator decides when based on accumulated
248///    duration + the segment_duration knob).
249/// 4. After the last packet is added and flushed, call `finalize`
250///    to consume the muxer and get the [`CmafTrackManifest`].
251///
252/// Segment files are named `seg-00001.m4s`, `seg-00002.m4s`, ...
253/// in the output dir.
254pub struct CmafVideoMuxer {
255    output_dir: PathBuf,
256    width: u32,
257    height: u32,
258    timescale: u32,
259    color_metadata: ColorMetadata,
260    track_id: u32,
261    /// Output codec. `Av1` stores OBUs verbatim + builds `av01`/`av1C`;
262    /// `H264`/`H265` repackage Annex-B → length-prefixed via `nal_writer` and
263    /// build `avc3`/`hev1` init segments with inline parameter sets.
264    codec: VideoCodec,
265    /// AV1 only: the OBU sequence header captured from the first packet.
266    config_obus: Option<Vec<u8>>,
267    /// H.264/H.265 only: Annex-B → length-prefixed repackaging + SPS/PPS(/VPS)
268    /// capture (inline mode — each segment self-describes; `avc3`/`hev1`).
269    nal_writer: Option<NalSampleWriter>,
270    init_path: PathBuf,
271    init_written: bool,
272    sequence_number: u32,
273    base_decode_time: u64,
274    pending: Vec<PendingVideoSample>,
275    segments: Vec<SegmentInfo>,
276}
277
278impl CmafVideoMuxer {
279    /// Construct a new video muxer that writes init.mp4 + segments to
280    /// `output_dir`. Creates the directory if it doesn't exist.
281    ///
282    /// Equivalent to `new_with_options(..., CmafVideoMuxerOptions::default())`.
283    pub fn new(
284        output_dir: impl AsRef<Path>,
285        width: u32,
286        height: u32,
287        timescale: u32,
288        color_metadata: ColorMetadata,
289    ) -> Result<Self> {
290        Self::new_with_options(
291            output_dir,
292            width,
293            height,
294            timescale,
295            color_metadata,
296            CmafVideoMuxerOptions::default(),
297        )
298    }
299
300    /// Construct a muxer with non-default options. See
301    /// [`CmafVideoMuxerOptions`].
302    ///
303    /// The helper-task path uses this to attach to an in-progress rung:
304    /// the helper's muxer starts numbering segments at the helper's
305    /// claim range start, advances `tfdt` to the corresponding decode
306    /// time, and skips the init segment write that the primary owns.
307    pub fn new_with_options(
308        output_dir: impl AsRef<Path>,
309        width: u32,
310        height: u32,
311        timescale: u32,
312        color_metadata: ColorMetadata,
313        options: CmafVideoMuxerOptions,
314    ) -> Result<Self> {
315        Self::new_with_codec_options(
316            output_dir,
317            width,
318            height,
319            timescale,
320            color_metadata,
321            VideoCodec::Av1,
322            options,
323        )
324    }
325
326    /// Codec-aware constructor. `Av1` matches the legacy behaviour; `H264` /
327    /// `H265` build `avc3` / `hev1` init segments and repackage the encoder's
328    /// Annex-B packets into length-prefixed samples with inline parameter sets
329    /// (each segment self-describes — robust across the multi-GPU helper path).
330    pub fn new_with_codec_options(
331        output_dir: impl AsRef<Path>,
332        width: u32,
333        height: u32,
334        timescale: u32,
335        color_metadata: ColorMetadata,
336        codec: VideoCodec,
337        options: CmafVideoMuxerOptions,
338    ) -> Result<Self> {
339        assert!(
340            options.first_segment_index >= 1,
341            "first_segment_index is 1-based; got {}",
342            options.first_segment_index,
343        );
344        let output_dir = output_dir.as_ref().to_path_buf();
345        fs::create_dir_all(&output_dir)
346            .with_context(|| format!("creating CMAF video output dir: {}", output_dir.display()))?;
347        let init_path = output_dir.join("init.mp4");
348        // H.264/H.265 use inline parameter sets (avc3/hev1) so each segment —
349        // and each independently-encoded multi-GPU chunk — self-describes.
350        let nal_writer = match codec {
351            VideoCodec::Av1 => None,
352            VideoCodec::H264 => Some(NalSampleWriter::new_inline(NalMuxCodec::H264)),
353            VideoCodec::H265 => Some(NalSampleWriter::new_inline(NalMuxCodec::H265)),
354        };
355        Ok(Self {
356            output_dir,
357            width,
358            height,
359            timescale,
360            color_metadata,
361            track_id: 1,
362            codec,
363            config_obus: None,
364            nal_writer,
365            init_path,
366            // When write_init_segment is false, mark init as already
367            // written so `ensure_init_written` is a no-op. The primary
368            // is expected to have written (or will write) init.mp4
369            // separately.
370            init_written: !options.write_init_segment,
371            // `flush_segment` pre-increments `sequence_number` before
372            // writing, so the on-disk segment number equals
373            // `sequence_number` AFTER the increment. To produce
374            // `seg-{first_segment_index:05}.m4s` as the first output,
375            // start at `first_segment_index - 1`.
376            sequence_number: options.first_segment_index - 1,
377            base_decode_time: options.first_segment_base_decode_time,
378            pending: Vec::new(),
379            segments: Vec::new(),
380        })
381    }
382
383    /// Add one encoded video packet to the current pending segment.
384    /// `duration` is in track-timescale ticks. `is_keyframe` must be
385    /// true for IDR / sync-sample packets — the muxer doesn't peek
386    /// into the OBU stream to figure that out, and a wrong value
387    /// will produce a CMAF segment that doesn't decode (the spec
388    /// requires every segment to start with a sync sample).
389    pub fn add_packet(&mut self, payload: Vec<u8>, duration: u32, is_keyframe: bool) -> Result<()> {
390        match &mut self.nal_writer {
391            None => {
392                // AV1: capture the OBU sequence header once; store OBUs verbatim.
393                if self.config_obus.is_none() {
394                    self.config_obus = Some(extract_sequence_header(&payload).context(
395                        "extracting AV1 sequence header from first packet for av1C config record",
396                    )?);
397                }
398                self.pending.push(PendingVideoSample {
399                    payload,
400                    duration,
401                    is_keyframe,
402                });
403            }
404            Some(writer) => {
405                // H.264/H.265: split the Annex-B packet into access units (one
406                // per frame); each becomes a length-prefixed sample carrying its
407                // own inline SPS/PPS. Per-AU keyframe (IDR) detection comes from
408                // the bitstream, not the caller's flag. Each frame keeps the
409                // full per-frame `duration` (a packet may hold several frames).
410                for au in writer.push_packet(&payload) {
411                    self.pending.push(PendingVideoSample {
412                        payload: au.data,
413                        duration,
414                        is_keyframe: au.is_keyframe,
415                    });
416                }
417            }
418        }
419        Ok(())
420    }
421
422    /// Whether the muxer is ready to flush a segment that starts on a
423    /// sync sample. The first sample in `pending` must be a keyframe.
424    /// CMAF requires every segment to begin with a sync sample
425    /// (§7.3.2.1), so the orchestrator should ensure this invariant
426    /// before calling `flush_segment`.
427    pub fn first_pending_is_keyframe(&self) -> bool {
428        self.pending.first().is_some_and(|s| s.is_keyframe)
429    }
430
431    /// Total duration of pending samples in track-timescale ticks. The
432    /// orchestrator uses this to decide when a segment has reached
433    /// its target duration.
434    pub fn pending_duration_ticks(&self) -> u64 {
435        self.pending.iter().map(|s| s.duration as u64).sum()
436    }
437
438    /// View of segments already flushed to disk. Each entry's
439    /// `sequence_number` is the segment's 1-based index; `path` is
440    /// the on-disk location. The helper-task path
441    /// (`pipeline::cmaf::cmaf_transcode_rung_slice`) reads this
442    /// between `add_packet` calls to detect "did the last add
443    /// trigger an auto-flush?" — when `segments().len()` grows, the
444    /// last entry is the newly-flushed segment.
445    pub fn segments(&self) -> &[SegmentInfo] {
446        &self.segments
447    }
448
449    /// Drop every sample currently in the pending buffer without
450    /// writing them to disk. Used by the helper-task path when its
451    /// claim has been shrunk by an `attach_helper` and the encoder's
452    /// lookahead would otherwise produce a segment that conflicts
453    /// with whichever helper now owns that range.
454    ///
455    /// Specifically: when a primary's claim is shrunk from `[0..N)`
456    /// to `[0..K)`, the primary's encoder has already received
457    /// frames `K*KI..K*KI+lookahead` by the time the claim-shrink
458    /// is observed at the segment boundary. Those frames belong to
459    /// the helper that took `[K..N)`. Discarding the muxer pending
460    /// + dropping the encoder is the cleanest way to ensure no
461    /// stale segment file is written for the helper's territory.
462    pub fn clear_pending(&mut self) {
463        self.pending.clear();
464    }
465
466    /// Flush pending samples to a new media segment file. Writes
467    /// `init.mp4` first if it hasn't been written yet (the av1C config
468    /// record needs the first packet's sequence header). Returns the
469    /// segment's metadata and clears the pending buffer.
470    ///
471    /// No-op if `pending` is empty.
472    pub fn flush_segment(&mut self) -> Result<Option<SegmentInfo>> {
473        if self.pending.is_empty() {
474            return Ok(None);
475        }
476        if !self.first_pending_is_keyframe() {
477            anyhow::bail!(
478                "CMAF segment must start with a sync sample; first pending sample is not a keyframe \
479                 (segment_number={}, pending_count={})",
480                self.sequence_number + 1,
481                self.pending.len()
482            );
483        }
484        self.ensure_init_written()?;
485
486        self.sequence_number += 1;
487        let seq = self.sequence_number;
488        let samples_meta: Vec<CmafSample> = self
489            .pending
490            .iter()
491            .map(|s| CmafSample {
492                duration: s.duration,
493                size: s.payload.len() as u32,
494                flags: if s.is_keyframe {
495                    SampleFlags::keyframe()
496                } else {
497                    SampleFlags::delta_frame()
498                },
499            })
500            .collect();
501        let segment_duration: u64 = samples_meta.iter().map(|s| s.duration as u64).sum();
502
503        let mut moof = build_moof_video(seq, self.track_id, self.base_decode_time, &samples_meta);
504        moof.patch_default_no_gap();
505
506        let payload_total: u64 = self.pending.iter().map(|s| s.payload.len() as u64).sum();
507        let mdat_box_size: u64 = 8 + payload_total;
508        if mdat_box_size > u32::MAX as u64 {
509            // Above u32::MAX we'd need a `largesize` mdat (16-byte header).
510            // For 4-second segments at sane bitrates this is impossible; if
511            // we ever hit it, bail with a clear error rather than silently
512            // overflowing.
513            anyhow::bail!(
514                "CMAF media segment payload {} bytes exceeds 32-bit mdat size limit",
515                payload_total
516            );
517        }
518
519        let path = self.output_dir.join(format!("seg-{:05}.m4s", seq));
520        let file = File::create(&path)
521            .with_context(|| format!("creating CMAF segment file: {}", path.display()))?;
522        let mut writer = BufWriter::new(file);
523        writer.write_all(&moof.bytes).context("writing moof")?;
524        writer
525            .write_all(&(mdat_box_size as u32).to_be_bytes())
526            .context("writing mdat size")?;
527        writer.write_all(b"mdat").context("writing mdat type")?;
528        for sample in &self.pending {
529            writer
530                .write_all(&sample.payload)
531                .context("writing mdat payload")?;
532        }
533        writer.flush().context("flushing CMAF segment writer")?;
534        let byte_size = moof.bytes.len() as u64 + mdat_box_size;
535
536        self.base_decode_time += segment_duration;
537        self.pending.clear();
538
539        let info = SegmentInfo {
540            sequence_number: seq,
541            path,
542            byte_size,
543            duration_ticks: segment_duration,
544        };
545        self.segments.push(info.clone());
546        Ok(Some(info))
547    }
548
549    /// Finalize the muxer: ensures the init segment is on disk (covers
550    /// the edge case where add_packet was called but flush_segment
551    /// never was — e.g. an empty source), drops any non-flushed
552    /// pending samples (caller should have flushed them), and returns
553    /// the manifest.
554    pub fn finalize(mut self) -> Result<CmafTrackManifest> {
555        if !self.pending.is_empty() {
556            // Flush whatever's left. The caller should have done this
557            // explicitly; we cover them defensively.
558            self.flush_segment()?;
559        }
560        self.ensure_init_written()?;
561        Ok(CmafTrackManifest {
562            init_path: self.init_path,
563            segments: self.segments,
564            timescale: self.timescale,
565        })
566    }
567
568    fn ensure_init_written(&mut self) -> Result<()> {
569        if self.init_written {
570            return Ok(());
571        }
572        let init = match self.codec {
573            VideoCodec::Av1 => {
574                let config = self.config_obus.as_ref().ok_or_else(|| {
575                    anyhow::anyhow!(
576                        "cannot write CMAF video init segment: no AV1 sequence header has been \
577                         observed yet (call add_packet before flush_segment / finalize)"
578                    )
579                })?;
580                build_init_segment_video(
581                    self.width,
582                    self.height,
583                    self.timescale,
584                    config,
585                    &self.color_metadata,
586                )
587            }
588            VideoCodec::H264 => {
589                let w = self.nal_writer.as_ref().context("H.264 CMAF nal writer missing")?;
590                if !w.has_param_sets() {
591                    anyhow::bail!("cannot write CMAF H.264 init segment: no SPS/PPS observed yet");
592                }
593                let avcc = build_avcc(&w.sps, &w.pps);
594                // avc3 sample entry (in-band parameter sets); avc1 ftyp brand.
595                let entry = build_avc1(self.width, self.height, &avcc, &self.color_metadata, b"avc3");
596                build_init_segment_video_with_entry(
597                    self.width,
598                    self.height,
599                    self.timescale,
600                    &entry,
601                    b"avc1",
602                )
603            }
604            VideoCodec::H265 => {
605                let w = self.nal_writer.as_ref().context("H.265 CMAF nal writer missing")?;
606                if !w.has_param_sets() {
607                    anyhow::bail!(
608                        "cannot write CMAF H.265 init segment: no VPS/SPS/PPS observed yet"
609                    );
610                }
611                let hvcc = build_hvcc(&w.vps, &w.sps, &w.pps);
612                // hev1 sample entry (in-band parameter sets); hvc1 ftyp brand.
613                let entry = build_hvc1(self.width, self.height, &hvcc, &self.color_metadata, b"hev1");
614                build_init_segment_video_with_entry(
615                    self.width,
616                    self.height,
617                    self.timescale,
618                    &entry,
619                    b"hvc1",
620                )
621            }
622        };
623        let mut file = File::create(&self.init_path).with_context(|| {
624            format!(
625                "creating CMAF video init segment: {}",
626                self.init_path.display()
627            )
628        })?;
629        file.write_all(&init)
630            .context("writing CMAF video init segment bytes")?;
631        file.flush().context("flushing CMAF video init segment")?;
632        self.init_written = true;
633        Ok(())
634    }
635}
636
637// =====================================================================
638// CmafAudioMuxer
639// =====================================================================
640
641/// Stateful CMAF audio segmenter. Same model as the video muxer but
642/// simpler — every audio sample is independently decodable, so there's
643/// no first-sample-flags / sync-boundary requirement.
644pub struct CmafAudioMuxer {
645    output_dir: PathBuf,
646    info: AudioInfo,
647    track_id: u32,
648    init_path: PathBuf,
649    init_written: bool,
650    sequence_number: u32,
651    base_decode_time: u64,
652    pending: Vec<PendingAudioSample>,
653    segments: Vec<SegmentInfo>,
654}
655
656impl CmafAudioMuxer {
657    pub fn new(output_dir: impl AsRef<Path>, info: AudioInfo) -> Result<Self> {
658        let output_dir = output_dir.as_ref().to_path_buf();
659        fs::create_dir_all(&output_dir)
660            .with_context(|| format!("creating CMAF audio output dir: {}", output_dir.display()))?;
661        let init_path = output_dir.join("init.mp4");
662        Ok(Self {
663            output_dir,
664            info,
665            track_id: 1,
666            init_path,
667            init_written: false,
668            sequence_number: 0,
669            base_decode_time: 0,
670            pending: Vec::new(),
671            segments: Vec::new(),
672        })
673    }
674
675    pub fn add_packet(&mut self, payload: Vec<u8>, duration: u32) -> Result<()> {
676        self.pending.push(PendingAudioSample { payload, duration });
677        Ok(())
678    }
679
680    pub fn pending_duration_ticks(&self) -> u64 {
681        self.pending.iter().map(|s| s.duration as u64).sum()
682    }
683
684    pub fn flush_segment(&mut self) -> Result<Option<SegmentInfo>> {
685        if self.pending.is_empty() {
686            return Ok(None);
687        }
688        self.ensure_init_written()?;
689
690        self.sequence_number += 1;
691        let seq = self.sequence_number;
692        let samples_meta: Vec<CmafSample> = self
693            .pending
694            .iter()
695            .map(|s| CmafSample {
696                duration: s.duration,
697                size: s.payload.len() as u32,
698                flags: SampleFlags::keyframe(),
699            })
700            .collect();
701        let segment_duration: u64 = samples_meta.iter().map(|s| s.duration as u64).sum();
702
703        let mut moof = build_moof_audio(seq, self.track_id, self.base_decode_time, &samples_meta);
704        moof.patch_default_no_gap();
705
706        let payload_total: u64 = self.pending.iter().map(|s| s.payload.len() as u64).sum();
707        let mdat_box_size: u64 = 8 + payload_total;
708        if mdat_box_size > u32::MAX as u64 {
709            anyhow::bail!(
710                "CMAF audio media segment payload {} bytes exceeds 32-bit mdat size limit",
711                payload_total
712            );
713        }
714
715        let path = self.output_dir.join(format!("seg-{:05}.m4s", seq));
716        let file = File::create(&path)
717            .with_context(|| format!("creating CMAF audio segment file: {}", path.display()))?;
718        let mut writer = BufWriter::new(file);
719        writer
720            .write_all(&moof.bytes)
721            .context("writing audio moof")?;
722        writer
723            .write_all(&(mdat_box_size as u32).to_be_bytes())
724            .context("writing audio mdat size")?;
725        writer
726            .write_all(b"mdat")
727            .context("writing audio mdat type")?;
728        for sample in &self.pending {
729            writer
730                .write_all(&sample.payload)
731                .context("writing audio mdat payload")?;
732        }
733        writer
734            .flush()
735            .context("flushing CMAF audio segment writer")?;
736        let byte_size = moof.bytes.len() as u64 + mdat_box_size;
737
738        self.base_decode_time += segment_duration;
739        self.pending.clear();
740
741        let info = SegmentInfo {
742            sequence_number: seq,
743            path,
744            byte_size,
745            duration_ticks: segment_duration,
746        };
747        self.segments.push(info.clone());
748        Ok(Some(info))
749    }
750
751    pub fn finalize(mut self) -> Result<CmafTrackManifest> {
752        if !self.pending.is_empty() {
753            self.flush_segment()?;
754        }
755        self.ensure_init_written()?;
756        let timescale = self.info.timescale;
757        Ok(CmafTrackManifest {
758            init_path: self.init_path,
759            segments: self.segments,
760            timescale,
761        })
762    }
763
764    fn ensure_init_written(&mut self) -> Result<()> {
765        if self.init_written {
766            return Ok(());
767        }
768        let init = build_init_segment_audio(&self.info);
769        let mut file = File::create(&self.init_path).with_context(|| {
770            format!(
771                "creating CMAF audio init segment: {}",
772                self.init_path.display()
773            )
774        })?;
775        file.write_all(&init)
776            .context("writing CMAF audio init segment bytes")?;
777        file.flush().context("flushing CMAF audio init segment")?;
778        self.init_written = true;
779        Ok(())
780    }
781}