container/cmaf/mod.rs
1//! Fragmented MP4 / CMAF box writers.
2//!
3//! Produces ISO/IEC 14496-12 §8.8 movie-fragment boxes (`moof` / `mfhd` /
4//! `traf` / `tfhd` / `tfdt` / `trun`) and the corresponding `mvex` /
5//! `mehd` / `trex` declarations that go inside a CMAF init segment's
6//! `moov`. CMAF (ISO/IEC 23000-19) constrains the general 14496-12 model:
7//! exactly one track per fragment (one `traf` per `moof`), exactly one
8//! track per init segment, and a small set of mandatory boxes.
9//!
10//! This module is the box-level primitive layer. Higher-level callers
11//! (`init_segment_video`, `media_segment_video`, etc. in subsequent
12//! commits) compose these into init + media segments. The split lets us
13//! unit-test each box's byte layout against the spec without having to
14//! drive a full encode + segment pipeline.
15//!
16//! Spec citations are given by section number in the relevant box's doc
17//! comment so future readers can cross-check against the standard.
18//!
19//! # CMAF brand
20//!
21//! Init segments for video tracks declare the `cmfc` brand (CMAF
22//! constraints, per CMAF §7.3.4). Audio tracks use `cmfa`. Both brands
23//! coexist in `compatible_brands` alongside the existing `iso6` / `mp42`
24//! / `av01` brands so non-CMAF-aware tools that consume the same boxes
25//! (e.g. an old ffprobe) can still demux them.
26//!
27//! # Sample-flags packing
28//!
29//! `default_sample_flags` (in `trex` / `tfhd`) and `first_sample_flags`
30//! / per-sample flags (in `trun`) are packed per ISO/IEC 14496-12
31//! §8.8.3.1. The 32 bits are laid out:
32//!
33//! ```text
34//! reserved[6] = 0
35//! is_leading[2] = 0
36//! sample_depends_on[2]
37//! sample_is_depended_on[2]
38//! sample_has_redundancy[2]
39//! sample_padding_value[3] = 0
40//! sample_is_non_sync_sample[1]
41//! sample_degradation_priority[16] = 0
42//! ```
43//!
44//! For AV1 / AAC the meaningful values are `sample_depends_on = 1`
45//! (this sample depends on others — i.e. P / B / non-IDR) or `2`
46//! (independent — i.e. IDR / sync), and `sample_is_non_sync_sample = 1`
47//! for non-key frames, `0` for keyframes. The helper
48//! [`SampleFlags::pack`] handles this; callers shouldn't compose the
49//! u32 by hand.
50
51use anyhow::{Context, Result};
52use codec::frame::{ColorMetadata, VideoCodec};
53use std::fs::{self, File};
54use std::io::{BufWriter, Write};
55use std::path::{Path, PathBuf};
56
57use crate::AudioInfo;
58use crate::mux::{build_avc1, build_avcc, build_hvc1, build_hvcc, extract_sequence_header};
59use crate::nal_mux::{NalMuxCodec, NalSampleWriter};
60
61mod fragment;
62mod init;
63#[cfg(test)]
64mod tests;
65
66pub use fragment::*;
67pub use init::*;
68
69// =====================================================================
70// Shared types (re-used by fragment.rs, init.rs, and the muxers here)
71// =====================================================================
72
73/// CMAF brand identifiers used in `ftyp.compatible_brands`.
74pub mod brand {
75 /// CMAF video constraints brand (CMAF §7.3.4).
76 pub const CMFC: &[u8; 4] = b"cmfc";
77 /// CMAF audio constraints brand (CMAF §7.3.5).
78 pub const CMFA: &[u8; 4] = b"cmfa";
79}
80
81/// Track type discriminator. CMAF places one track per init / fragment;
82/// this enum is what higher-level orchestration uses to pick which
83/// codec dispatch to take. The init / segment writers themselves don't
84/// take this enum (they have type-specific entry points), so it stays
85/// `#[allow(dead_code)]` until the pipeline orchestrator (Phase 4)
86/// wires it through.
87#[derive(Debug, Clone, Copy, PartialEq, Eq)]
88#[allow(dead_code)]
89pub enum CmafTrackKind {
90 Video,
91 Audio,
92}
93
94/// Sample flags as packed in `default_sample_flags` / `first_sample_flags` /
95/// per-sample `sample_flags` in `trun`. ISO/IEC 14496-12 §8.8.3.1.
96///
97/// Defaults model an AV1 P-frame: depends-on=1, non-sync=1, no redundancy.
98/// Override `is_sync` for IDR / key samples. The remaining fields aren't
99/// meaningful for our pipeline (no DRM / leading samples / temporal layers
100/// past Annex H), so they stay at their spec-default zero values.
101#[derive(Debug, Clone, Copy)]
102pub struct SampleFlags {
103 /// `sample_is_non_sync_sample` flag. False ⇔ keyframe / IDR.
104 pub is_sync: bool,
105}
106
107impl SampleFlags {
108 /// Pack into the wire-format u32. See module docs for bit layout.
109 pub fn pack(self) -> u32 {
110 // For sync samples: sample_depends_on=2 (no other samples needed
111 // to decode — i.e. independent), sample_is_non_sync_sample=0.
112 // For non-sync: sample_depends_on=1 (depends on prior samples),
113 // sample_is_non_sync_sample=1.
114 if self.is_sync {
115 // depends_on=2 in bits 24-25; is_non_sync=0 in bit 16.
116 0x02_00_00_00
117 } else {
118 // depends_on=1 in bits 24-25; is_non_sync=1 in bit 16.
119 0x01_01_00_00
120 }
121 }
122
123 pub fn keyframe() -> Self {
124 Self { is_sync: true }
125 }
126 pub fn delta_frame() -> Self {
127 Self { is_sync: false }
128 }
129}
130
131/// Per-sample fields written into `trun`. Each entry produces one row
132/// of (duration, size, flags) in the fragment's sample table.
133#[derive(Debug, Clone, Copy)]
134pub struct CmafSample {
135 /// Sample duration in track timescale ticks.
136 pub duration: u32,
137 /// Encoded sample size in bytes.
138 pub size: u32,
139 /// Sample flags (sync / non-sync). The very FIRST sample in a fragment
140 /// uses `first_sample_flags` instead — see `build_trun_video`.
141 pub flags: SampleFlags,
142}
143
144// =====================================================================
145// Stateful per-rendition segmenter types
146// =====================================================================
147
148/// Per-segment metadata returned by [`CmafVideoMuxer::flush_segment`] /
149/// [`CmafAudioMuxer::flush_segment`]. These records form the input to
150/// the HLS playlist writer (Phase 3) and the segment-alignment validator
151/// (Phase 5).
152#[derive(Debug, Clone)]
153pub struct SegmentInfo {
154 /// 1-based monotonically increasing sequence number per track.
155 pub sequence_number: u32,
156 /// Path of the `seg-NNNNN.m4s` file on disk.
157 pub path: PathBuf,
158 /// Total file size in bytes (moof + mdat header + payload).
159 pub byte_size: u64,
160 /// Sum of per-sample durations in track-timescale ticks. The HLS
161 /// `EXTINF` line is written from this divided by the timescale.
162 pub duration_ticks: u64,
163}
164
165/// Output of a finalized track muxer: where the init segment lives,
166/// the ordered list of media segments, and the timescale needed to
167/// convert `duration_ticks` to seconds.
168#[derive(Debug, Clone)]
169pub struct CmafTrackManifest {
170 pub init_path: PathBuf,
171 pub segments: Vec<SegmentInfo>,
172 pub timescale: u32,
173}
174
175impl CmafTrackManifest {
176 /// Total duration across all segments, in seconds.
177 pub fn duration_seconds(&self) -> f64 {
178 let total_ticks: u64 = self.segments.iter().map(|s| s.duration_ticks).sum();
179 total_ticks as f64 / self.timescale as f64
180 }
181}
182
183/// One pending video sample inside the muxer's per-segment buffer.
184struct PendingVideoSample {
185 payload: Vec<u8>,
186 duration: u32,
187 is_keyframe: bool,
188}
189
190/// One pending audio sample.
191struct PendingAudioSample {
192 payload: Vec<u8>,
193 duration: u32,
194}
195
196// =====================================================================
197// CmafVideoMuxer
198// =====================================================================
199
200/// Optional construction parameters for [`CmafVideoMuxer`]. Defaults
201/// match the original 5-arg `new()` behaviour: write init.mp4, start
202/// segment numbering at 1, decode-time at 0.
203///
204/// Non-default values are used by the multi-GPU helper-task path
205/// (see `pipeline::cmaf` helper variant): when multiple muxers share
206/// a single per-rung output directory, each helper's muxer starts
207/// at a non-1 `first_segment_index` and the corresponding decode-time
208/// offset, and only the primary writes `init.mp4`.
209#[derive(Debug, Clone)]
210pub struct CmafVideoMuxerOptions {
211 /// 1-based segment index the muxer's first `flush_segment()` will
212 /// write. The output file is `seg-{first_segment_index:05}.m4s`.
213 /// Defaults to `1` (the primary's first segment).
214 pub first_segment_index: u32,
215 /// Decode-time (in track-timescale ticks) of the muxer's first
216 /// segment's first sample. Should equal
217 /// `(first_segment_index - 1) * segment_duration_ticks` so that
218 /// `tfdt` is byte-identical to what the primary would produce for
219 /// the same segment index. Defaults to `0`.
220 pub first_segment_base_decode_time: u64,
221 /// When `false`, `flush_segment()` and `finalize()` skip writing
222 /// `init.mp4`. Use when a sibling muxer (typically the primary)
223 /// is responsible for the init segment and helpers must not race
224 /// against it. Defaults to `true`.
225 pub write_init_segment: bool,
226}
227
228impl Default for CmafVideoMuxerOptions {
229 fn default() -> Self {
230 Self {
231 first_segment_index: 1,
232 first_segment_base_decode_time: 0,
233 write_init_segment: true,
234 }
235 }
236}
237
238/// Stateful CMAF video segmenter for one AV1 rendition.
239///
240/// Driven by the pipeline:
241/// 1. Construct with rendition dimensions + output dir + timescale.
242/// 2. Call `add_packet` for each encoded packet from the encoder.
243/// The first packet's OBU stream MUST contain a sequence header;
244/// the muxer extracts it and uses it for `av1C` in the init.mp4
245/// (written lazily on the first `flush_segment` call).
246/// 3. Call `flush_segment` whenever a CMAF fragment boundary is
247/// reached (the orchestrator decides when based on accumulated
248/// duration + the segment_duration knob).
249/// 4. After the last packet is added and flushed, call `finalize`
250/// to consume the muxer and get the [`CmafTrackManifest`].
251///
252/// Segment files are named `seg-00001.m4s`, `seg-00002.m4s`, ...
253/// in the output dir.
254pub struct CmafVideoMuxer {
255 output_dir: PathBuf,
256 width: u32,
257 height: u32,
258 timescale: u32,
259 color_metadata: ColorMetadata,
260 track_id: u32,
261 /// Output codec. `Av1` stores OBUs verbatim + builds `av01`/`av1C`;
262 /// `H264`/`H265` repackage Annex-B → length-prefixed via `nal_writer` and
263 /// build `avc3`/`hev1` init segments with inline parameter sets.
264 codec: VideoCodec,
265 /// AV1 only: the OBU sequence header captured from the first packet.
266 config_obus: Option<Vec<u8>>,
267 /// H.264/H.265 only: Annex-B → length-prefixed repackaging + SPS/PPS(/VPS)
268 /// capture (inline mode — each segment self-describes; `avc3`/`hev1`).
269 nal_writer: Option<NalSampleWriter>,
270 init_path: PathBuf,
271 init_written: bool,
272 sequence_number: u32,
273 base_decode_time: u64,
274 pending: Vec<PendingVideoSample>,
275 segments: Vec<SegmentInfo>,
276}
277
278impl CmafVideoMuxer {
279 /// Construct a new video muxer that writes init.mp4 + segments to
280 /// `output_dir`. Creates the directory if it doesn't exist.
281 ///
282 /// Equivalent to `new_with_options(..., CmafVideoMuxerOptions::default())`.
283 pub fn new(
284 output_dir: impl AsRef<Path>,
285 width: u32,
286 height: u32,
287 timescale: u32,
288 color_metadata: ColorMetadata,
289 ) -> Result<Self> {
290 Self::new_with_options(
291 output_dir,
292 width,
293 height,
294 timescale,
295 color_metadata,
296 CmafVideoMuxerOptions::default(),
297 )
298 }
299
300 /// Construct a muxer with non-default options. See
301 /// [`CmafVideoMuxerOptions`].
302 ///
303 /// The helper-task path uses this to attach to an in-progress rung:
304 /// the helper's muxer starts numbering segments at the helper's
305 /// claim range start, advances `tfdt` to the corresponding decode
306 /// time, and skips the init segment write that the primary owns.
307 pub fn new_with_options(
308 output_dir: impl AsRef<Path>,
309 width: u32,
310 height: u32,
311 timescale: u32,
312 color_metadata: ColorMetadata,
313 options: CmafVideoMuxerOptions,
314 ) -> Result<Self> {
315 Self::new_with_codec_options(
316 output_dir,
317 width,
318 height,
319 timescale,
320 color_metadata,
321 VideoCodec::Av1,
322 options,
323 )
324 }
325
326 /// Codec-aware constructor. `Av1` matches the legacy behaviour; `H264` /
327 /// `H265` build `avc3` / `hev1` init segments and repackage the encoder's
328 /// Annex-B packets into length-prefixed samples with inline parameter sets
329 /// (each segment self-describes — robust across the multi-GPU helper path).
330 pub fn new_with_codec_options(
331 output_dir: impl AsRef<Path>,
332 width: u32,
333 height: u32,
334 timescale: u32,
335 color_metadata: ColorMetadata,
336 codec: VideoCodec,
337 options: CmafVideoMuxerOptions,
338 ) -> Result<Self> {
339 assert!(
340 options.first_segment_index >= 1,
341 "first_segment_index is 1-based; got {}",
342 options.first_segment_index,
343 );
344 let output_dir = output_dir.as_ref().to_path_buf();
345 fs::create_dir_all(&output_dir)
346 .with_context(|| format!("creating CMAF video output dir: {}", output_dir.display()))?;
347 let init_path = output_dir.join("init.mp4");
348 // H.264/H.265 use inline parameter sets (avc3/hev1) so each segment —
349 // and each independently-encoded multi-GPU chunk — self-describes.
350 let nal_writer = match codec {
351 VideoCodec::Av1 => None,
352 VideoCodec::H264 => Some(NalSampleWriter::new_inline(NalMuxCodec::H264)),
353 VideoCodec::H265 => Some(NalSampleWriter::new_inline(NalMuxCodec::H265)),
354 };
355 Ok(Self {
356 output_dir,
357 width,
358 height,
359 timescale,
360 color_metadata,
361 track_id: 1,
362 codec,
363 config_obus: None,
364 nal_writer,
365 init_path,
366 // When write_init_segment is false, mark init as already
367 // written so `ensure_init_written` is a no-op. The primary
368 // is expected to have written (or will write) init.mp4
369 // separately.
370 init_written: !options.write_init_segment,
371 // `flush_segment` pre-increments `sequence_number` before
372 // writing, so the on-disk segment number equals
373 // `sequence_number` AFTER the increment. To produce
374 // `seg-{first_segment_index:05}.m4s` as the first output,
375 // start at `first_segment_index - 1`.
376 sequence_number: options.first_segment_index - 1,
377 base_decode_time: options.first_segment_base_decode_time,
378 pending: Vec::new(),
379 segments: Vec::new(),
380 })
381 }
382
383 /// Add one encoded video packet to the current pending segment.
384 /// `duration` is in track-timescale ticks. `is_keyframe` must be
385 /// true for IDR / sync-sample packets — the muxer doesn't peek
386 /// into the OBU stream to figure that out, and a wrong value
387 /// will produce a CMAF segment that doesn't decode (the spec
388 /// requires every segment to start with a sync sample).
389 pub fn add_packet(&mut self, payload: Vec<u8>, duration: u32, is_keyframe: bool) -> Result<()> {
390 match &mut self.nal_writer {
391 None => {
392 // AV1: capture the OBU sequence header once; store OBUs verbatim.
393 if self.config_obus.is_none() {
394 self.config_obus = Some(extract_sequence_header(&payload).context(
395 "extracting AV1 sequence header from first packet for av1C config record",
396 )?);
397 }
398 self.pending.push(PendingVideoSample {
399 payload,
400 duration,
401 is_keyframe,
402 });
403 }
404 Some(writer) => {
405 // H.264/H.265: split the Annex-B packet into access units (one
406 // per frame); each becomes a length-prefixed sample carrying its
407 // own inline SPS/PPS. Per-AU keyframe (IDR) detection comes from
408 // the bitstream, not the caller's flag. Each frame keeps the
409 // full per-frame `duration` (a packet may hold several frames).
410 for au in writer.push_packet(&payload) {
411 self.pending.push(PendingVideoSample {
412 payload: au.data,
413 duration,
414 is_keyframe: au.is_keyframe,
415 });
416 }
417 }
418 }
419 Ok(())
420 }
421
422 /// Whether the muxer is ready to flush a segment that starts on a
423 /// sync sample. The first sample in `pending` must be a keyframe.
424 /// CMAF requires every segment to begin with a sync sample
425 /// (§7.3.2.1), so the orchestrator should ensure this invariant
426 /// before calling `flush_segment`.
427 pub fn first_pending_is_keyframe(&self) -> bool {
428 self.pending.first().is_some_and(|s| s.is_keyframe)
429 }
430
431 /// Total duration of pending samples in track-timescale ticks. The
432 /// orchestrator uses this to decide when a segment has reached
433 /// its target duration.
434 pub fn pending_duration_ticks(&self) -> u64 {
435 self.pending.iter().map(|s| s.duration as u64).sum()
436 }
437
438 /// View of segments already flushed to disk. Each entry's
439 /// `sequence_number` is the segment's 1-based index; `path` is
440 /// the on-disk location. The helper-task path
441 /// (`pipeline::cmaf::cmaf_transcode_rung_slice`) reads this
442 /// between `add_packet` calls to detect "did the last add
443 /// trigger an auto-flush?" — when `segments().len()` grows, the
444 /// last entry is the newly-flushed segment.
445 pub fn segments(&self) -> &[SegmentInfo] {
446 &self.segments
447 }
448
449 /// Drop every sample currently in the pending buffer without
450 /// writing them to disk. Used by the helper-task path when its
451 /// claim has been shrunk by an `attach_helper` and the encoder's
452 /// lookahead would otherwise produce a segment that conflicts
453 /// with whichever helper now owns that range.
454 ///
455 /// Specifically: when a primary's claim is shrunk from `[0..N)`
456 /// to `[0..K)`, the primary's encoder has already received
457 /// frames `K*KI..K*KI+lookahead` by the time the claim-shrink
458 /// is observed at the segment boundary. Those frames belong to
459 /// the helper that took `[K..N)`. Discarding the muxer pending
460 /// + dropping the encoder is the cleanest way to ensure no
461 /// stale segment file is written for the helper's territory.
462 pub fn clear_pending(&mut self) {
463 self.pending.clear();
464 }
465
466 /// Flush pending samples to a new media segment file. Writes
467 /// `init.mp4` first if it hasn't been written yet (the av1C config
468 /// record needs the first packet's sequence header). Returns the
469 /// segment's metadata and clears the pending buffer.
470 ///
471 /// No-op if `pending` is empty.
472 pub fn flush_segment(&mut self) -> Result<Option<SegmentInfo>> {
473 if self.pending.is_empty() {
474 return Ok(None);
475 }
476 if !self.first_pending_is_keyframe() {
477 anyhow::bail!(
478 "CMAF segment must start with a sync sample; first pending sample is not a keyframe \
479 (segment_number={}, pending_count={})",
480 self.sequence_number + 1,
481 self.pending.len()
482 );
483 }
484 self.ensure_init_written()?;
485
486 self.sequence_number += 1;
487 let seq = self.sequence_number;
488 let samples_meta: Vec<CmafSample> = self
489 .pending
490 .iter()
491 .map(|s| CmafSample {
492 duration: s.duration,
493 size: s.payload.len() as u32,
494 flags: if s.is_keyframe {
495 SampleFlags::keyframe()
496 } else {
497 SampleFlags::delta_frame()
498 },
499 })
500 .collect();
501 let segment_duration: u64 = samples_meta.iter().map(|s| s.duration as u64).sum();
502
503 let mut moof = build_moof_video(seq, self.track_id, self.base_decode_time, &samples_meta);
504 moof.patch_default_no_gap();
505
506 let payload_total: u64 = self.pending.iter().map(|s| s.payload.len() as u64).sum();
507 let mdat_box_size: u64 = 8 + payload_total;
508 if mdat_box_size > u32::MAX as u64 {
509 // Above u32::MAX we'd need a `largesize` mdat (16-byte header).
510 // For 4-second segments at sane bitrates this is impossible; if
511 // we ever hit it, bail with a clear error rather than silently
512 // overflowing.
513 anyhow::bail!(
514 "CMAF media segment payload {} bytes exceeds 32-bit mdat size limit",
515 payload_total
516 );
517 }
518
519 let path = self.output_dir.join(format!("seg-{:05}.m4s", seq));
520 let file = File::create(&path)
521 .with_context(|| format!("creating CMAF segment file: {}", path.display()))?;
522 let mut writer = BufWriter::new(file);
523 writer.write_all(&moof.bytes).context("writing moof")?;
524 writer
525 .write_all(&(mdat_box_size as u32).to_be_bytes())
526 .context("writing mdat size")?;
527 writer.write_all(b"mdat").context("writing mdat type")?;
528 for sample in &self.pending {
529 writer
530 .write_all(&sample.payload)
531 .context("writing mdat payload")?;
532 }
533 writer.flush().context("flushing CMAF segment writer")?;
534 let byte_size = moof.bytes.len() as u64 + mdat_box_size;
535
536 self.base_decode_time += segment_duration;
537 self.pending.clear();
538
539 let info = SegmentInfo {
540 sequence_number: seq,
541 path,
542 byte_size,
543 duration_ticks: segment_duration,
544 };
545 self.segments.push(info.clone());
546 Ok(Some(info))
547 }
548
549 /// Finalize the muxer: ensures the init segment is on disk (covers
550 /// the edge case where add_packet was called but flush_segment
551 /// never was — e.g. an empty source), drops any non-flushed
552 /// pending samples (caller should have flushed them), and returns
553 /// the manifest.
554 pub fn finalize(mut self) -> Result<CmafTrackManifest> {
555 if !self.pending.is_empty() {
556 // Flush whatever's left. The caller should have done this
557 // explicitly; we cover them defensively.
558 self.flush_segment()?;
559 }
560 self.ensure_init_written()?;
561 Ok(CmafTrackManifest {
562 init_path: self.init_path,
563 segments: self.segments,
564 timescale: self.timescale,
565 })
566 }
567
568 fn ensure_init_written(&mut self) -> Result<()> {
569 if self.init_written {
570 return Ok(());
571 }
572 let init = match self.codec {
573 VideoCodec::Av1 => {
574 let config = self.config_obus.as_ref().ok_or_else(|| {
575 anyhow::anyhow!(
576 "cannot write CMAF video init segment: no AV1 sequence header has been \
577 observed yet (call add_packet before flush_segment / finalize)"
578 )
579 })?;
580 build_init_segment_video(
581 self.width,
582 self.height,
583 self.timescale,
584 config,
585 &self.color_metadata,
586 )
587 }
588 VideoCodec::H264 => {
589 let w = self.nal_writer.as_ref().context("H.264 CMAF nal writer missing")?;
590 if !w.has_param_sets() {
591 anyhow::bail!("cannot write CMAF H.264 init segment: no SPS/PPS observed yet");
592 }
593 let avcc = build_avcc(&w.sps, &w.pps);
594 // avc3 sample entry (in-band parameter sets); avc1 ftyp brand.
595 let entry = build_avc1(self.width, self.height, &avcc, &self.color_metadata, b"avc3");
596 build_init_segment_video_with_entry(
597 self.width,
598 self.height,
599 self.timescale,
600 &entry,
601 b"avc1",
602 )
603 }
604 VideoCodec::H265 => {
605 let w = self.nal_writer.as_ref().context("H.265 CMAF nal writer missing")?;
606 if !w.has_param_sets() {
607 anyhow::bail!(
608 "cannot write CMAF H.265 init segment: no VPS/SPS/PPS observed yet"
609 );
610 }
611 let hvcc = build_hvcc(&w.vps, &w.sps, &w.pps);
612 // hev1 sample entry (in-band parameter sets); hvc1 ftyp brand.
613 let entry = build_hvc1(self.width, self.height, &hvcc, &self.color_metadata, b"hev1");
614 build_init_segment_video_with_entry(
615 self.width,
616 self.height,
617 self.timescale,
618 &entry,
619 b"hvc1",
620 )
621 }
622 };
623 let mut file = File::create(&self.init_path).with_context(|| {
624 format!(
625 "creating CMAF video init segment: {}",
626 self.init_path.display()
627 )
628 })?;
629 file.write_all(&init)
630 .context("writing CMAF video init segment bytes")?;
631 file.flush().context("flushing CMAF video init segment")?;
632 self.init_written = true;
633 Ok(())
634 }
635}
636
637// =====================================================================
638// CmafAudioMuxer
639// =====================================================================
640
641/// Stateful CMAF audio segmenter. Same model as the video muxer but
642/// simpler — every audio sample is independently decodable, so there's
643/// no first-sample-flags / sync-boundary requirement.
644pub struct CmafAudioMuxer {
645 output_dir: PathBuf,
646 info: AudioInfo,
647 track_id: u32,
648 init_path: PathBuf,
649 init_written: bool,
650 sequence_number: u32,
651 base_decode_time: u64,
652 pending: Vec<PendingAudioSample>,
653 segments: Vec<SegmentInfo>,
654}
655
656impl CmafAudioMuxer {
657 pub fn new(output_dir: impl AsRef<Path>, info: AudioInfo) -> Result<Self> {
658 let output_dir = output_dir.as_ref().to_path_buf();
659 fs::create_dir_all(&output_dir)
660 .with_context(|| format!("creating CMAF audio output dir: {}", output_dir.display()))?;
661 let init_path = output_dir.join("init.mp4");
662 Ok(Self {
663 output_dir,
664 info,
665 track_id: 1,
666 init_path,
667 init_written: false,
668 sequence_number: 0,
669 base_decode_time: 0,
670 pending: Vec::new(),
671 segments: Vec::new(),
672 })
673 }
674
675 pub fn add_packet(&mut self, payload: Vec<u8>, duration: u32) -> Result<()> {
676 self.pending.push(PendingAudioSample { payload, duration });
677 Ok(())
678 }
679
680 pub fn pending_duration_ticks(&self) -> u64 {
681 self.pending.iter().map(|s| s.duration as u64).sum()
682 }
683
684 pub fn flush_segment(&mut self) -> Result<Option<SegmentInfo>> {
685 if self.pending.is_empty() {
686 return Ok(None);
687 }
688 self.ensure_init_written()?;
689
690 self.sequence_number += 1;
691 let seq = self.sequence_number;
692 let samples_meta: Vec<CmafSample> = self
693 .pending
694 .iter()
695 .map(|s| CmafSample {
696 duration: s.duration,
697 size: s.payload.len() as u32,
698 flags: SampleFlags::keyframe(),
699 })
700 .collect();
701 let segment_duration: u64 = samples_meta.iter().map(|s| s.duration as u64).sum();
702
703 let mut moof = build_moof_audio(seq, self.track_id, self.base_decode_time, &samples_meta);
704 moof.patch_default_no_gap();
705
706 let payload_total: u64 = self.pending.iter().map(|s| s.payload.len() as u64).sum();
707 let mdat_box_size: u64 = 8 + payload_total;
708 if mdat_box_size > u32::MAX as u64 {
709 anyhow::bail!(
710 "CMAF audio media segment payload {} bytes exceeds 32-bit mdat size limit",
711 payload_total
712 );
713 }
714
715 let path = self.output_dir.join(format!("seg-{:05}.m4s", seq));
716 let file = File::create(&path)
717 .with_context(|| format!("creating CMAF audio segment file: {}", path.display()))?;
718 let mut writer = BufWriter::new(file);
719 writer
720 .write_all(&moof.bytes)
721 .context("writing audio moof")?;
722 writer
723 .write_all(&(mdat_box_size as u32).to_be_bytes())
724 .context("writing audio mdat size")?;
725 writer
726 .write_all(b"mdat")
727 .context("writing audio mdat type")?;
728 for sample in &self.pending {
729 writer
730 .write_all(&sample.payload)
731 .context("writing audio mdat payload")?;
732 }
733 writer
734 .flush()
735 .context("flushing CMAF audio segment writer")?;
736 let byte_size = moof.bytes.len() as u64 + mdat_box_size;
737
738 self.base_decode_time += segment_duration;
739 self.pending.clear();
740
741 let info = SegmentInfo {
742 sequence_number: seq,
743 path,
744 byte_size,
745 duration_ticks: segment_duration,
746 };
747 self.segments.push(info.clone());
748 Ok(Some(info))
749 }
750
751 pub fn finalize(mut self) -> Result<CmafTrackManifest> {
752 if !self.pending.is_empty() {
753 self.flush_segment()?;
754 }
755 self.ensure_init_written()?;
756 let timescale = self.info.timescale;
757 Ok(CmafTrackManifest {
758 init_path: self.init_path,
759 segments: self.segments,
760 timescale,
761 })
762 }
763
764 fn ensure_init_written(&mut self) -> Result<()> {
765 if self.init_written {
766 return Ok(());
767 }
768 let init = build_init_segment_audio(&self.info);
769 let mut file = File::create(&self.init_path).with_context(|| {
770 format!(
771 "creating CMAF audio init segment: {}",
772 self.init_path.display()
773 )
774 })?;
775 file.write_all(&init)
776 .context("writing CMAF audio init segment bytes")?;
777 file.flush().context("flushing CMAF audio init segment")?;
778 self.init_written = true;
779 Ok(())
780 }
781}