Skip to main content

codec/
frame.rs

1use bytes::Bytes;
2
3/// Output video codec for the encoder + muxer. AV1 is the project default
4/// (royalty-clean); H.264 / H.265 are selectable for compatibility with
5/// legacy players (they carry patent-licensing obligations — see the docs).
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
7pub enum VideoCodec {
8    #[default]
9    Av1,
10    H264,
11    H265,
12}
13
14impl VideoCodec {
15    /// Short lowercase label (`"av1"` / `"h264"` / `"h265"`).
16    pub fn label(self) -> &'static str {
17        match self {
18            VideoCodec::Av1 => "av1",
19            VideoCodec::H264 => "h264",
20            VideoCodec::H265 => "h265",
21        }
22    }
23
24    /// The ISOBMFF visual sample-entry fourcc (`av01` / `avc1` / `hvc1`).
25    pub fn sample_entry_fourcc(self) -> &'static str {
26        match self {
27            VideoCodec::Av1 => "av01",
28            VideoCodec::H264 => "avc1",
29            VideoCodec::H265 => "hvc1",
30        }
31    }
32}
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum PixelFormat {
36    Yuv420p,
37    Yuv420p10le,
38    Yuv420p12le,
39    Yuv422p,
40    Yuv422p10le,
41    Yuv444p,
42    Yuv444p10le,
43    /// 4-plane 10-bit 4:4:4 with alpha. Y/Cb/Cr stored as u16 LE in the
44    /// 0..=1023 range (10-bit sample domain). Alpha stored as u16 LE in
45    /// the 0..=65535 range (16-bit precision — RDD 36 §7.7 alpha stream
46    /// carries 16-bit samples for `ap4h`/`ap4x`, we preserve that rather
47    /// than re-quantize down to 10-bit). Matches the ffmpeg
48    /// `yuva444p10le` naming convention but the alpha plane is
49    /// effectively 16-bit — documented limitation for downstream pipeline
50    /// consumers (which today only accept 8-bit YUV420p; roadmap item #5
51    /// tracks 10-bit end-to-end, after which a further extension can
52    /// carry alpha too).
53    Yuva444p10le,
54    Nv12,
55    Nv21,
56    Rgb24,
57    Rgba32,
58}
59
60impl PixelFormat {
61    pub fn bytes_per_frame(&self, width: u32, height: u32) -> usize {
62        let pixels = (width as usize) * (height as usize);
63        match self {
64            Self::Yuv420p | Self::Nv12 | Self::Nv21 => pixels * 3 / 2,
65            Self::Yuv420p10le | Self::Yuv420p12le => pixels * 3,
66            Self::Yuv422p => pixels * 2,
67            Self::Yuv422p10le => pixels * 4,
68            Self::Yuv444p => pixels * 3,
69            Self::Yuv444p10le => pixels * 6,
70            // 4 planes × 2 bytes/sample. Alpha is 16-bit, Y/Cb/Cr are
71            // 10-bit stored in 16-bit containers — total 8 bytes/pixel.
72            Self::Yuva444p10le => pixels * 8,
73            Self::Rgb24 => pixels * 3,
74            Self::Rgba32 => pixels * 4,
75        }
76    }
77
78    /// ffmpeg-compatible string. Used in probe payloads so downstream
79    /// consumers (Laravel, validators) see the same names the Python
80    /// implementation emitted.
81    pub fn as_ffmpeg_str(&self) -> &'static str {
82        match self {
83            Self::Yuv420p => "yuv420p",
84            Self::Yuv420p10le => "yuv420p10le",
85            Self::Yuv420p12le => "yuv420p12le",
86            Self::Yuv422p => "yuv422p",
87            Self::Yuv422p10le => "yuv422p10le",
88            Self::Yuv444p => "yuv444p",
89            Self::Yuv444p10le => "yuv444p10le",
90            Self::Yuva444p10le => "yuva444p10le",
91            Self::Nv12 => "nv12",
92            Self::Nv21 => "nv21",
93            Self::Rgb24 => "rgb24",
94            Self::Rgba32 => "rgba",
95        }
96    }
97
98    pub fn from_chroma_and_depth(chroma_idc: u8, bit_depth: u8) -> Self {
99        match (chroma_idc, bit_depth) {
100            (1, 8) => Self::Yuv420p,
101            (1, 10) => Self::Yuv420p10le,
102            (1, 12) => Self::Yuv420p12le,
103            (2, 8) => Self::Yuv422p,
104            (2, 10) => Self::Yuv422p10le,
105            (3, 8) => Self::Yuv444p,
106            (3, 10) => Self::Yuv444p10le,
107            _ => Self::Yuv420p, // defensive default
108        }
109    }
110}
111
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113pub enum ColorSpace {
114    Bt601,
115    Bt709,
116    /// Covers both Bt2020 non-constant luminance and constant luminance
117    /// matrix variants (ITU-T H.273 matrix_coefficients 9 and 10). The
118    /// distinction rarely matters at the decode interface — downstream
119    /// mux writes it into the `colr nclx` box's matrix_coefficients
120    /// field which is carried separately on `StreamInfo` via the raw
121    /// `matrix_coefficients` u8, not re-derived from this enum.
122    Bt2020,
123}
124
125/// Transfer characteristics per ITU-T H.273 §8.2 / H.265 Table E.4.
126/// Carried on `StreamInfo` so the MP4 mux's `colr nclx` writer can
127/// round-trip HDR10 (ST2084) and HLG content without losing metadata.
128///
129/// Separate from `ColorSpace` so existing call sites — every decoder
130/// emits a `VideoFrame { color_space, .. }` and every colorspace
131/// converter / encoder dispatches on it — continue to compile
132/// unchanged. The transfer function is orthogonal to the matrix
133/// coefficients for pipeline purposes; only the mux needs both.
134#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
135pub enum TransferFn {
136    /// Gamma 2.2 / Rec. 709 (H.273 value 1). Default for SDR content.
137    #[default]
138    Bt709,
139    /// Gamma 2.8 / BT.470BG.
140    Bt470Bg,
141    /// Linear (H.273 value 8).
142    Linear,
143    /// SMPTE ST 2084 / PQ (H.273 value 16). HDR10.
144    St2084,
145    /// ARIB STD-B67 / HLG (H.273 value 18). Broadcast HDR.
146    AribStdB67,
147    /// Unspecified or unmapped — consumers fall back to Bt709 gamma.
148    Unspecified,
149}
150
151impl TransferFn {
152    /// Map an H.273 `transfer_characteristics` value to the subset of
153    /// transfers this pipeline knows about. Unknown values collapse
154    /// to `Unspecified`.
155    pub fn from_h273(value: u8) -> Self {
156        match value {
157            1 | 6 | 14 | 15 => Self::Bt709, // Rec.709 family
158            4 => Self::Bt470Bg,
159            8 => Self::Linear,
160            16 => Self::St2084,
161            18 => Self::AribStdB67,
162            _ => Self::Unspecified,
163        }
164    }
165}
166
167#[derive(Debug, Clone)]
168pub struct VideoFrame {
169    pub data: Bytes,
170    pub width: u32,
171    pub height: u32,
172    pub format: PixelFormat,
173    pub color_space: ColorSpace,
174    pub pts: u64,
175}
176
177impl VideoFrame {
178    pub fn new(
179        data: Bytes,
180        width: u32,
181        height: u32,
182        format: PixelFormat,
183        color_space: ColorSpace,
184        pts: u64,
185    ) -> Self {
186        Self {
187            data,
188            width,
189            height,
190            format,
191            color_space,
192            pts,
193        }
194    }
195}
196
197#[derive(Debug, Clone)]
198pub struct StreamInfo {
199    pub codec: String,
200    pub width: u32,
201    pub height: u32,
202    pub frame_rate: f64,
203    pub duration: f64,
204    pub pixel_format: PixelFormat,
205    pub color_space: ColorSpace,
206    pub total_frames: u64,
207    pub bitrate: u64,
208    /// HDR-relevant metadata. Bundled into one sub-struct that defaults
209    /// to SDR BT.709 so every existing `StreamInfo { ... }` literal in
210    /// the codebase compiles unchanged via `..Default::default()` or
211    /// direct field init; only HDR-aware sites (nvdec sequence_callback,
212    /// HEVC/AV1 SPS/VUI parsers, MP4 mux `colr nclx` writer) populate
213    /// non-default values.
214    pub color_metadata: ColorMetadata,
215}
216
217/// HDR / wide-gamut metadata carried from SPS VUI through the pipeline
218/// to the MP4 mux `colr nclx` box. All values default to an SDR
219/// BT.709 baseline so un-annotated StreamInfo constructions stay
220/// backward-compatible.
221#[derive(Debug, Clone, Copy, PartialEq, Eq)]
222pub struct ColorMetadata {
223    /// Transfer function per ITU-T H.273. Defaults to Bt709 for SDR.
224    /// HDR10 streams populate this with St2084 (PQ); HLG broadcasts
225    /// with AribStdB67.
226    pub transfer: TransferFn,
227    /// Raw H.273 `matrix_coefficients` (0–255). Preserves the ncl/cl
228    /// distinction the ColorSpace enum collapses: 9 = BT.2020 NCL,
229    /// 10 = BT.2020 CL. Mux writes this verbatim into `colr nclx`.
230    pub matrix_coefficients: u8,
231    /// Raw H.273 `colour_primaries` (0–255). Written verbatim into
232    /// `colr nclx`.
233    pub colour_primaries: u8,
234    /// `full_range_flag` (H.273): false = studio/limited-range (16..235),
235    /// true = full-range (0..255). HEVC SPS VUI exposes this directly.
236    pub full_range: bool,
237    /// HDR10 mastering display color volume (SMPTE ST 2086, HEVC SEI 137,
238    /// AV1 metadata OBU type 2 HDR_MDCV, MP4 `mdcv` box, MKV
239    /// `MasteringMetadata`). `None` for SDR sources or when the
240    /// upstream did not signal it. Carried to the MP4 mux's `mdcv` box
241    /// per ISO/IEC 14496-12 §12.1.6 / AV1-ISOBMFF v1.3.0 (Squad-20).
242    /// Populated by Squad-21 from HEVC SEI 137 / AV1 metadata OBU
243    /// `METADATA_TYPE_HDR_MDCV` / MP4 `mdcv` / MKV `MasteringMetadata`.
244    /// Without it, Apple devices fall back to BT.709 limited even when
245    /// `colr nclx` signals BT.2020.
246    pub mastering_display: Option<MasteringDisplay>,
247    /// HDR10 content light level info (CTA-861.3, HEVC SEI 144, AV1
248    /// metadata OBU type 1 HDR_CLL, MP4 `clli`, MKV `MaxCLL` +
249    /// `MaxFALL`). `None` for SDR or unsignalled HDR. Carried to the
250    /// MP4 mux's `clli` box per ISO/IEC 14496-12 §12.1.6 / AV1-ISOBMFF
251    /// v1.3.0 (Squad-20). Populated by Squad-21 from HEVC SEI 144 / AV1
252    /// metadata OBU `METADATA_TYPE_HDR_CLL` / MP4 `clli` / MKV.
253    pub content_light_level: Option<ContentLightLevel>,
254}
255
256impl Default for ColorMetadata {
257    fn default() -> Self {
258        // SDR BT.709 baseline: matrix=1, primaries=1, transfer=Bt709,
259        // studio range. Matches the implicit behavior of every existing
260        // decoder that didn't previously populate color metadata.
261        Self {
262            transfer: TransferFn::Bt709,
263            matrix_coefficients: 1,
264            colour_primaries: 1,
265            full_range: false,
266            mastering_display: None,
267            content_light_level: None,
268        }
269    }
270}
271
272/// HDR10 Mastering Display Color Volume per SMPTE ST 2086 / HEVC SEI
273/// message 137 (D.2.28 in the H.265 spec) / AV1 Metadata OBU
274/// `METADATA_TYPE_HDR_MDCV`. Wire-encoded into the MP4 `mdcv` box as
275/// 8 × u16 BE primaries/white-point + 2 × u32 BE luminance, total 24
276/// bytes payload.
277///
278/// **Units (per the spec):**
279/// - `primaries_*_x` / `primaries_*_y` / `white_point_*` are in
280///   increments of 0.00002 of the CIE 1931 chromaticity diagram. The
281///   wire format is the unscaled u16 (e.g. BT.2020 red x=0.708 →
282///   `(0.708 / 0.00002) = 35400`).
283/// - `max_luminance` and `min_luminance` are in increments of 0.0001
284///   cd/m² (nits). The wire format is the unscaled u32 (e.g.
285///   1000 nits → `10_000_000`).
286///
287/// **Field-name contract** with Squad-21 (probe HDR): these names are
288/// load-bearing — the probe imports this struct and populates it
289/// directly from the SEI/OBU payload. Do not rename without coordinating.
290#[derive(Debug, Clone, Copy, PartialEq, Eq)]
291pub struct MasteringDisplay {
292    pub primaries_r_x: u16,
293    pub primaries_r_y: u16,
294    pub primaries_g_x: u16,
295    pub primaries_g_y: u16,
296    pub primaries_b_x: u16,
297    pub primaries_b_y: u16,
298    pub white_point_x: u16,
299    pub white_point_y: u16,
300    pub max_luminance: u32,
301    pub min_luminance: u32,
302}
303
304/// HDR10 Content Light Level Information per CTA-861.3 / HEVC SEI 144
305/// (content_light_level_info) / AV1 Metadata OBU
306/// `METADATA_TYPE_HDR_CLL`. Wire-encoded into the MP4 `clli` box as
307/// 2 × u16 BE, total 4 bytes payload.
308///
309/// **Units (per the spec):**
310/// - `max_cll` — Maximum Content Light Level, peak luminance of the
311///   brightest pixel anywhere in the stream, in cd/m² (integer nits).
312/// - `max_fall` — Maximum Frame-Average Light Level, peak per-frame
313///   average luminance, in cd/m² (integer nits).
314///
315/// **Field-name contract** with Squad-21: load-bearing names; do not
316/// rename without coordinating.
317#[derive(Debug, Clone, Copy, PartialEq, Eq)]
318pub struct ContentLightLevel {
319    pub max_cll: u16,
320    pub max_fall: u16,
321}