Skip to main content

codec/
frame.rs

1use bytes::Bytes;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4pub enum PixelFormat {
5    Yuv420p,
6    Yuv420p10le,
7    Yuv420p12le,
8    Yuv422p,
9    Yuv422p10le,
10    Yuv444p,
11    Yuv444p10le,
12    /// 4-plane 10-bit 4:4:4 with alpha. Y/Cb/Cr stored as u16 LE in the
13    /// 0..=1023 range (10-bit sample domain). Alpha stored as u16 LE in
14    /// the 0..=65535 range (16-bit precision — RDD 36 §7.7 alpha stream
15    /// carries 16-bit samples for `ap4h`/`ap4x`, we preserve that rather
16    /// than re-quantize down to 10-bit). Matches the ffmpeg
17    /// `yuva444p10le` naming convention but the alpha plane is
18    /// effectively 16-bit — documented limitation for downstream pipeline
19    /// consumers (which today only accept 8-bit YUV420p; roadmap item #5
20    /// tracks 10-bit end-to-end, after which a further extension can
21    /// carry alpha too).
22    Yuva444p10le,
23    Nv12,
24    Nv21,
25    Rgb24,
26    Rgba32,
27}
28
29impl PixelFormat {
30    pub fn bytes_per_frame(&self, width: u32, height: u32) -> usize {
31        let pixels = (width as usize) * (height as usize);
32        match self {
33            Self::Yuv420p | Self::Nv12 | Self::Nv21 => pixels * 3 / 2,
34            Self::Yuv420p10le | Self::Yuv420p12le => pixels * 3,
35            Self::Yuv422p => pixels * 2,
36            Self::Yuv422p10le => pixels * 4,
37            Self::Yuv444p => pixels * 3,
38            Self::Yuv444p10le => pixels * 6,
39            // 4 planes × 2 bytes/sample. Alpha is 16-bit, Y/Cb/Cr are
40            // 10-bit stored in 16-bit containers — total 8 bytes/pixel.
41            Self::Yuva444p10le => pixels * 8,
42            Self::Rgb24 => pixels * 3,
43            Self::Rgba32 => pixels * 4,
44        }
45    }
46
47    /// ffmpeg-compatible string. Used in probe payloads so downstream
48    /// consumers (Laravel, validators) see the same names the Python
49    /// implementation emitted.
50    pub fn as_ffmpeg_str(&self) -> &'static str {
51        match self {
52            Self::Yuv420p => "yuv420p",
53            Self::Yuv420p10le => "yuv420p10le",
54            Self::Yuv420p12le => "yuv420p12le",
55            Self::Yuv422p => "yuv422p",
56            Self::Yuv422p10le => "yuv422p10le",
57            Self::Yuv444p => "yuv444p",
58            Self::Yuv444p10le => "yuv444p10le",
59            Self::Yuva444p10le => "yuva444p10le",
60            Self::Nv12 => "nv12",
61            Self::Nv21 => "nv21",
62            Self::Rgb24 => "rgb24",
63            Self::Rgba32 => "rgba",
64        }
65    }
66
67    pub fn from_chroma_and_depth(chroma_idc: u8, bit_depth: u8) -> Self {
68        match (chroma_idc, bit_depth) {
69            (1, 8) => Self::Yuv420p,
70            (1, 10) => Self::Yuv420p10le,
71            (1, 12) => Self::Yuv420p12le,
72            (2, 8) => Self::Yuv422p,
73            (2, 10) => Self::Yuv422p10le,
74            (3, 8) => Self::Yuv444p,
75            (3, 10) => Self::Yuv444p10le,
76            _ => Self::Yuv420p, // defensive default
77        }
78    }
79}
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub enum ColorSpace {
83    Bt601,
84    Bt709,
85    /// Covers both Bt2020 non-constant luminance and constant luminance
86    /// matrix variants (ITU-T H.273 matrix_coefficients 9 and 10). The
87    /// distinction rarely matters at the decode interface — downstream
88    /// mux writes it into the `colr nclx` box's matrix_coefficients
89    /// field which is carried separately on `StreamInfo` via the raw
90    /// `matrix_coefficients` u8, not re-derived from this enum.
91    Bt2020,
92}
93
94/// Transfer characteristics per ITU-T H.273 §8.2 / H.265 Table E.4.
95/// Carried on `StreamInfo` so the MP4 mux's `colr nclx` writer can
96/// round-trip HDR10 (ST2084) and HLG content without losing metadata.
97///
98/// Separate from `ColorSpace` so existing call sites — every decoder
99/// emits a `VideoFrame { color_space, .. }` and every colorspace
100/// converter / encoder dispatches on it — continue to compile
101/// unchanged. The transfer function is orthogonal to the matrix
102/// coefficients for pipeline purposes; only the mux needs both.
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
104pub enum TransferFn {
105    /// Gamma 2.2 / Rec. 709 (H.273 value 1). Default for SDR content.
106    #[default]
107    Bt709,
108    /// Gamma 2.8 / BT.470BG.
109    Bt470Bg,
110    /// Linear (H.273 value 8).
111    Linear,
112    /// SMPTE ST 2084 / PQ (H.273 value 16). HDR10.
113    St2084,
114    /// ARIB STD-B67 / HLG (H.273 value 18). Broadcast HDR.
115    AribStdB67,
116    /// Unspecified or unmapped — consumers fall back to Bt709 gamma.
117    Unspecified,
118}
119
120impl TransferFn {
121    /// Map an H.273 `transfer_characteristics` value to the subset of
122    /// transfers this pipeline knows about. Unknown values collapse
123    /// to `Unspecified`.
124    pub fn from_h273(value: u8) -> Self {
125        match value {
126            1 | 6 | 14 | 15 => Self::Bt709, // Rec.709 family
127            4 => Self::Bt470Bg,
128            8 => Self::Linear,
129            16 => Self::St2084,
130            18 => Self::AribStdB67,
131            _ => Self::Unspecified,
132        }
133    }
134}
135
136#[derive(Debug, Clone)]
137pub struct VideoFrame {
138    pub data: Bytes,
139    pub width: u32,
140    pub height: u32,
141    pub format: PixelFormat,
142    pub color_space: ColorSpace,
143    pub pts: u64,
144}
145
146impl VideoFrame {
147    pub fn new(
148        data: Bytes,
149        width: u32,
150        height: u32,
151        format: PixelFormat,
152        color_space: ColorSpace,
153        pts: u64,
154    ) -> Self {
155        Self {
156            data,
157            width,
158            height,
159            format,
160            color_space,
161            pts,
162        }
163    }
164}
165
166#[derive(Debug, Clone)]
167pub struct StreamInfo {
168    pub codec: String,
169    pub width: u32,
170    pub height: u32,
171    pub frame_rate: f64,
172    pub duration: f64,
173    pub pixel_format: PixelFormat,
174    pub color_space: ColorSpace,
175    pub total_frames: u64,
176    pub bitrate: u64,
177    /// HDR-relevant metadata. Bundled into one sub-struct that defaults
178    /// to SDR BT.709 so every existing `StreamInfo { ... }` literal in
179    /// the codebase compiles unchanged via `..Default::default()` or
180    /// direct field init; only HDR-aware sites (nvdec sequence_callback,
181    /// HEVC/AV1 SPS/VUI parsers, MP4 mux `colr nclx` writer) populate
182    /// non-default values.
183    pub color_metadata: ColorMetadata,
184}
185
186/// HDR / wide-gamut metadata carried from SPS VUI through the pipeline
187/// to the MP4 mux `colr nclx` box. All values default to an SDR
188/// BT.709 baseline so un-annotated StreamInfo constructions stay
189/// backward-compatible.
190#[derive(Debug, Clone, Copy, PartialEq, Eq)]
191pub struct ColorMetadata {
192    /// Transfer function per ITU-T H.273. Defaults to Bt709 for SDR.
193    /// HDR10 streams populate this with St2084 (PQ); HLG broadcasts
194    /// with AribStdB67.
195    pub transfer: TransferFn,
196    /// Raw H.273 `matrix_coefficients` (0–255). Preserves the ncl/cl
197    /// distinction the ColorSpace enum collapses: 9 = BT.2020 NCL,
198    /// 10 = BT.2020 CL. Mux writes this verbatim into `colr nclx`.
199    pub matrix_coefficients: u8,
200    /// Raw H.273 `colour_primaries` (0–255). Written verbatim into
201    /// `colr nclx`.
202    pub colour_primaries: u8,
203    /// `full_range_flag` (H.273): false = studio/limited-range (16..235),
204    /// true = full-range (0..255). HEVC SPS VUI exposes this directly.
205    pub full_range: bool,
206    /// HDR10 mastering display color volume (SMPTE ST 2086, HEVC SEI 137,
207    /// AV1 metadata OBU type 2 HDR_MDCV, MP4 `mdcv` box, MKV
208    /// `MasteringMetadata`). `None` for SDR sources or when the
209    /// upstream did not signal it. Carried to the MP4 mux's `mdcv` box
210    /// per ISO/IEC 14496-12 §12.1.6 / AV1-ISOBMFF v1.3.0 (Squad-20).
211    /// Populated by Squad-21 from HEVC SEI 137 / AV1 metadata OBU
212    /// `METADATA_TYPE_HDR_MDCV` / MP4 `mdcv` / MKV `MasteringMetadata`.
213    /// Without it, Apple devices fall back to BT.709 limited even when
214    /// `colr nclx` signals BT.2020.
215    pub mastering_display: Option<MasteringDisplay>,
216    /// HDR10 content light level info (CTA-861.3, HEVC SEI 144, AV1
217    /// metadata OBU type 1 HDR_CLL, MP4 `clli`, MKV `MaxCLL` +
218    /// `MaxFALL`). `None` for SDR or unsignalled HDR. Carried to the
219    /// MP4 mux's `clli` box per ISO/IEC 14496-12 §12.1.6 / AV1-ISOBMFF
220    /// v1.3.0 (Squad-20). Populated by Squad-21 from HEVC SEI 144 / AV1
221    /// metadata OBU `METADATA_TYPE_HDR_CLL` / MP4 `clli` / MKV.
222    pub content_light_level: Option<ContentLightLevel>,
223}
224
225impl Default for ColorMetadata {
226    fn default() -> Self {
227        // SDR BT.709 baseline: matrix=1, primaries=1, transfer=Bt709,
228        // studio range. Matches the implicit behavior of every existing
229        // decoder that didn't previously populate color metadata.
230        Self {
231            transfer: TransferFn::Bt709,
232            matrix_coefficients: 1,
233            colour_primaries: 1,
234            full_range: false,
235            mastering_display: None,
236            content_light_level: None,
237        }
238    }
239}
240
241/// HDR10 Mastering Display Color Volume per SMPTE ST 2086 / HEVC SEI
242/// message 137 (D.2.28 in the H.265 spec) / AV1 Metadata OBU
243/// `METADATA_TYPE_HDR_MDCV`. Wire-encoded into the MP4 `mdcv` box as
244/// 8 × u16 BE primaries/white-point + 2 × u32 BE luminance, total 24
245/// bytes payload.
246///
247/// **Units (per the spec):**
248/// - `primaries_*_x` / `primaries_*_y` / `white_point_*` are in
249///   increments of 0.00002 of the CIE 1931 chromaticity diagram. The
250///   wire format is the unscaled u16 (e.g. BT.2020 red x=0.708 →
251///   `(0.708 / 0.00002) = 35400`).
252/// - `max_luminance` and `min_luminance` are in increments of 0.0001
253///   cd/m² (nits). The wire format is the unscaled u32 (e.g.
254///   1000 nits → `10_000_000`).
255///
256/// **Field-name contract** with Squad-21 (probe HDR): these names are
257/// load-bearing — the probe imports this struct and populates it
258/// directly from the SEI/OBU payload. Do not rename without coordinating.
259#[derive(Debug, Clone, Copy, PartialEq, Eq)]
260pub struct MasteringDisplay {
261    pub primaries_r_x: u16,
262    pub primaries_r_y: u16,
263    pub primaries_g_x: u16,
264    pub primaries_g_y: u16,
265    pub primaries_b_x: u16,
266    pub primaries_b_y: u16,
267    pub white_point_x: u16,
268    pub white_point_y: u16,
269    pub max_luminance: u32,
270    pub min_luminance: u32,
271}
272
273/// HDR10 Content Light Level Information per CTA-861.3 / HEVC SEI 144
274/// (content_light_level_info) / AV1 Metadata OBU
275/// `METADATA_TYPE_HDR_CLL`. Wire-encoded into the MP4 `clli` box as
276/// 2 × u16 BE, total 4 bytes payload.
277///
278/// **Units (per the spec):**
279/// - `max_cll` — Maximum Content Light Level, peak luminance of the
280///   brightest pixel anywhere in the stream, in cd/m² (integer nits).
281/// - `max_fall` — Maximum Frame-Average Light Level, peak per-frame
282///   average luminance, in cd/m² (integer nits).
283///
284/// **Field-name contract** with Squad-21: load-bearing names; do not
285/// rename without coordinating.
286#[derive(Debug, Clone, Copy, PartialEq, Eq)]
287pub struct ContentLightLevel {
288    pub max_cll: u16,
289    pub max_fall: u16,
290}