codec/frame.rs
1use bytes::Bytes;
2
3/// Output video codec for the encoder + muxer. AV1 is the project default
4/// (royalty-clean); H.264 / H.265 are selectable for compatibility with
5/// legacy players (they carry patent-licensing obligations — see the docs).
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
7pub enum VideoCodec {
8 #[default]
9 Av1,
10 H264,
11 H265,
12}
13
14impl VideoCodec {
15 /// Short lowercase label (`"av1"` / `"h264"` / `"h265"`).
16 pub fn label(self) -> &'static str {
17 match self {
18 VideoCodec::Av1 => "av1",
19 VideoCodec::H264 => "h264",
20 VideoCodec::H265 => "h265",
21 }
22 }
23
24 /// The ISOBMFF visual sample-entry fourcc (`av01` / `avc1` / `hvc1`).
25 pub fn sample_entry_fourcc(self) -> &'static str {
26 match self {
27 VideoCodec::Av1 => "av01",
28 VideoCodec::H264 => "avc1",
29 VideoCodec::H265 => "hvc1",
30 }
31 }
32}
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum PixelFormat {
36 Yuv420p,
37 Yuv420p10le,
38 Yuv420p12le,
39 Yuv422p,
40 Yuv422p10le,
41 Yuv444p,
42 Yuv444p10le,
43 /// 4-plane 10-bit 4:4:4 with alpha. Y/Cb/Cr stored as u16 LE in the
44 /// 0..=1023 range (10-bit sample domain). Alpha stored as u16 LE in
45 /// the 0..=65535 range (16-bit precision — RDD 36 §7.7 alpha stream
46 /// carries 16-bit samples for `ap4h`/`ap4x`, we preserve that rather
47 /// than re-quantize down to 10-bit). Matches the ffmpeg
48 /// `yuva444p10le` naming convention but the alpha plane is
49 /// effectively 16-bit — documented limitation for downstream pipeline
50 /// consumers (which today only accept 8-bit YUV420p; roadmap item #5
51 /// tracks 10-bit end-to-end, after which a further extension can
52 /// carry alpha too).
53 Yuva444p10le,
54 Nv12,
55 Nv21,
56 Rgb24,
57 Rgba32,
58}
59
60impl PixelFormat {
61 pub fn bytes_per_frame(&self, width: u32, height: u32) -> usize {
62 let pixels = (width as usize) * (height as usize);
63 match self {
64 Self::Yuv420p | Self::Nv12 | Self::Nv21 => pixels * 3 / 2,
65 Self::Yuv420p10le | Self::Yuv420p12le => pixels * 3,
66 Self::Yuv422p => pixels * 2,
67 Self::Yuv422p10le => pixels * 4,
68 Self::Yuv444p => pixels * 3,
69 Self::Yuv444p10le => pixels * 6,
70 // 4 planes × 2 bytes/sample. Alpha is 16-bit, Y/Cb/Cr are
71 // 10-bit stored in 16-bit containers — total 8 bytes/pixel.
72 Self::Yuva444p10le => pixels * 8,
73 Self::Rgb24 => pixels * 3,
74 Self::Rgba32 => pixels * 4,
75 }
76 }
77
78 /// ffmpeg-compatible string. Used in probe payloads so downstream
79 /// consumers (Laravel, validators) see the same names the Python
80 /// implementation emitted.
81 pub fn as_ffmpeg_str(&self) -> &'static str {
82 match self {
83 Self::Yuv420p => "yuv420p",
84 Self::Yuv420p10le => "yuv420p10le",
85 Self::Yuv420p12le => "yuv420p12le",
86 Self::Yuv422p => "yuv422p",
87 Self::Yuv422p10le => "yuv422p10le",
88 Self::Yuv444p => "yuv444p",
89 Self::Yuv444p10le => "yuv444p10le",
90 Self::Yuva444p10le => "yuva444p10le",
91 Self::Nv12 => "nv12",
92 Self::Nv21 => "nv21",
93 Self::Rgb24 => "rgb24",
94 Self::Rgba32 => "rgba",
95 }
96 }
97
98 pub fn from_chroma_and_depth(chroma_idc: u8, bit_depth: u8) -> Self {
99 match (chroma_idc, bit_depth) {
100 (1, 8) => Self::Yuv420p,
101 (1, 10) => Self::Yuv420p10le,
102 (1, 12) => Self::Yuv420p12le,
103 (2, 8) => Self::Yuv422p,
104 (2, 10) => Self::Yuv422p10le,
105 (3, 8) => Self::Yuv444p,
106 (3, 10) => Self::Yuv444p10le,
107 _ => Self::Yuv420p, // defensive default
108 }
109 }
110}
111
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113pub enum ColorSpace {
114 Bt601,
115 Bt709,
116 /// Covers both Bt2020 non-constant luminance and constant luminance
117 /// matrix variants (ITU-T H.273 matrix_coefficients 9 and 10). The
118 /// distinction rarely matters at the decode interface — downstream
119 /// mux writes it into the `colr nclx` box's matrix_coefficients
120 /// field which is carried separately on `StreamInfo` via the raw
121 /// `matrix_coefficients` u8, not re-derived from this enum.
122 Bt2020,
123}
124
125/// Transfer characteristics per ITU-T H.273 §8.2 / H.265 Table E.4.
126/// Carried on `StreamInfo` so the MP4 mux's `colr nclx` writer can
127/// round-trip HDR10 (ST2084) and HLG content without losing metadata.
128///
129/// Separate from `ColorSpace` so existing call sites — every decoder
130/// emits a `VideoFrame { color_space, .. }` and every colorspace
131/// converter / encoder dispatches on it — continue to compile
132/// unchanged. The transfer function is orthogonal to the matrix
133/// coefficients for pipeline purposes; only the mux needs both.
134#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
135pub enum TransferFn {
136 /// Gamma 2.2 / Rec. 709 (H.273 value 1). Default for SDR content.
137 #[default]
138 Bt709,
139 /// Gamma 2.8 / BT.470BG.
140 Bt470Bg,
141 /// Linear (H.273 value 8).
142 Linear,
143 /// SMPTE ST 2084 / PQ (H.273 value 16). HDR10.
144 St2084,
145 /// ARIB STD-B67 / HLG (H.273 value 18). Broadcast HDR.
146 AribStdB67,
147 /// Unspecified or unmapped — consumers fall back to Bt709 gamma.
148 Unspecified,
149}
150
151impl TransferFn {
152 /// Map an H.273 `transfer_characteristics` value to the subset of
153 /// transfers this pipeline knows about. Unknown values collapse
154 /// to `Unspecified`.
155 pub fn from_h273(value: u8) -> Self {
156 match value {
157 1 | 6 | 14 | 15 => Self::Bt709, // Rec.709 family
158 4 => Self::Bt470Bg,
159 8 => Self::Linear,
160 16 => Self::St2084,
161 18 => Self::AribStdB67,
162 _ => Self::Unspecified,
163 }
164 }
165}
166
167#[derive(Debug, Clone)]
168pub struct VideoFrame {
169 pub data: Bytes,
170 pub width: u32,
171 pub height: u32,
172 pub format: PixelFormat,
173 pub color_space: ColorSpace,
174 pub pts: u64,
175}
176
177impl VideoFrame {
178 pub fn new(
179 data: Bytes,
180 width: u32,
181 height: u32,
182 format: PixelFormat,
183 color_space: ColorSpace,
184 pts: u64,
185 ) -> Self {
186 Self {
187 data,
188 width,
189 height,
190 format,
191 color_space,
192 pts,
193 }
194 }
195}
196
197#[derive(Debug, Clone)]
198pub struct StreamInfo {
199 pub codec: String,
200 pub width: u32,
201 pub height: u32,
202 pub frame_rate: f64,
203 pub duration: f64,
204 pub pixel_format: PixelFormat,
205 pub color_space: ColorSpace,
206 pub total_frames: u64,
207 pub bitrate: u64,
208 /// HDR-relevant metadata. Bundled into one sub-struct that defaults
209 /// to SDR BT.709 so every existing `StreamInfo { ... }` literal in
210 /// the codebase compiles unchanged via `..Default::default()` or
211 /// direct field init; only HDR-aware sites (nvdec sequence_callback,
212 /// HEVC/AV1 SPS/VUI parsers, MP4 mux `colr nclx` writer) populate
213 /// non-default values.
214 pub color_metadata: ColorMetadata,
215}
216
217/// HDR / wide-gamut metadata carried from SPS VUI through the pipeline
218/// to the MP4 mux `colr nclx` box. All values default to an SDR
219/// BT.709 baseline so un-annotated StreamInfo constructions stay
220/// backward-compatible.
221#[derive(Debug, Clone, Copy, PartialEq, Eq)]
222pub struct ColorMetadata {
223 /// Transfer function per ITU-T H.273. Defaults to Bt709 for SDR.
224 /// HDR10 streams populate this with St2084 (PQ); HLG broadcasts
225 /// with AribStdB67.
226 pub transfer: TransferFn,
227 /// Raw H.273 `matrix_coefficients` (0–255). Preserves the ncl/cl
228 /// distinction the ColorSpace enum collapses: 9 = BT.2020 NCL,
229 /// 10 = BT.2020 CL. Mux writes this verbatim into `colr nclx`.
230 pub matrix_coefficients: u8,
231 /// Raw H.273 `colour_primaries` (0–255). Written verbatim into
232 /// `colr nclx`.
233 pub colour_primaries: u8,
234 /// `full_range_flag` (H.273): false = studio/limited-range (16..235),
235 /// true = full-range (0..255). HEVC SPS VUI exposes this directly.
236 pub full_range: bool,
237 /// HDR10 mastering display color volume (SMPTE ST 2086, HEVC SEI 137,
238 /// AV1 metadata OBU type 2 HDR_MDCV, MP4 `mdcv` box, MKV
239 /// `MasteringMetadata`). `None` for SDR sources or when the
240 /// upstream did not signal it. Carried to the MP4 mux's `mdcv` box
241 /// per ISO/IEC 14496-12 §12.1.6 / AV1-ISOBMFF v1.3.0 (Squad-20).
242 /// Populated by Squad-21 from HEVC SEI 137 / AV1 metadata OBU
243 /// `METADATA_TYPE_HDR_MDCV` / MP4 `mdcv` / MKV `MasteringMetadata`.
244 /// Without it, Apple devices fall back to BT.709 limited even when
245 /// `colr nclx` signals BT.2020.
246 pub mastering_display: Option<MasteringDisplay>,
247 /// HDR10 content light level info (CTA-861.3, HEVC SEI 144, AV1
248 /// metadata OBU type 1 HDR_CLL, MP4 `clli`, MKV `MaxCLL` +
249 /// `MaxFALL`). `None` for SDR or unsignalled HDR. Carried to the
250 /// MP4 mux's `clli` box per ISO/IEC 14496-12 §12.1.6 / AV1-ISOBMFF
251 /// v1.3.0 (Squad-20). Populated by Squad-21 from HEVC SEI 144 / AV1
252 /// metadata OBU `METADATA_TYPE_HDR_CLL` / MP4 `clli` / MKV.
253 pub content_light_level: Option<ContentLightLevel>,
254}
255
256impl Default for ColorMetadata {
257 fn default() -> Self {
258 // SDR BT.709 baseline: matrix=1, primaries=1, transfer=Bt709,
259 // studio range. Matches the implicit behavior of every existing
260 // decoder that didn't previously populate color metadata.
261 Self {
262 transfer: TransferFn::Bt709,
263 matrix_coefficients: 1,
264 colour_primaries: 1,
265 full_range: false,
266 mastering_display: None,
267 content_light_level: None,
268 }
269 }
270}
271
272/// HDR10 Mastering Display Color Volume per SMPTE ST 2086 / HEVC SEI
273/// message 137 (D.2.28 in the H.265 spec) / AV1 Metadata OBU
274/// `METADATA_TYPE_HDR_MDCV`. Wire-encoded into the MP4 `mdcv` box as
275/// 8 × u16 BE primaries/white-point + 2 × u32 BE luminance, total 24
276/// bytes payload.
277///
278/// **Units (per the spec):**
279/// - `primaries_*_x` / `primaries_*_y` / `white_point_*` are in
280/// increments of 0.00002 of the CIE 1931 chromaticity diagram. The
281/// wire format is the unscaled u16 (e.g. BT.2020 red x=0.708 →
282/// `(0.708 / 0.00002) = 35400`).
283/// - `max_luminance` and `min_luminance` are in increments of 0.0001
284/// cd/m² (nits). The wire format is the unscaled u32 (e.g.
285/// 1000 nits → `10_000_000`).
286///
287/// **Field-name contract** with Squad-21 (probe HDR): these names are
288/// load-bearing — the probe imports this struct and populates it
289/// directly from the SEI/OBU payload. Do not rename without coordinating.
290#[derive(Debug, Clone, Copy, PartialEq, Eq)]
291pub struct MasteringDisplay {
292 pub primaries_r_x: u16,
293 pub primaries_r_y: u16,
294 pub primaries_g_x: u16,
295 pub primaries_g_y: u16,
296 pub primaries_b_x: u16,
297 pub primaries_b_y: u16,
298 pub white_point_x: u16,
299 pub white_point_y: u16,
300 pub max_luminance: u32,
301 pub min_luminance: u32,
302}
303
304/// HDR10 Content Light Level Information per CTA-861.3 / HEVC SEI 144
305/// (content_light_level_info) / AV1 Metadata OBU
306/// `METADATA_TYPE_HDR_CLL`. Wire-encoded into the MP4 `clli` box as
307/// 2 × u16 BE, total 4 bytes payload.
308///
309/// **Units (per the spec):**
310/// - `max_cll` — Maximum Content Light Level, peak luminance of the
311/// brightest pixel anywhere in the stream, in cd/m² (integer nits).
312/// - `max_fall` — Maximum Frame-Average Light Level, peak per-frame
313/// average luminance, in cd/m² (integer nits).
314///
315/// **Field-name contract** with Squad-21: load-bearing names; do not
316/// rename without coordinating.
317#[derive(Debug, Clone, Copy, PartialEq, Eq)]
318pub struct ContentLightLevel {
319 pub max_cll: u16,
320 pub max_fall: u16,
321}