codec/colorspace/downsample_444.rs
1// =============================================================================
2// 4:4:4 → 4:2:0 chroma downsample (Squad-31, roadmap #6).
3// =============================================================================
4//
5// ProRes 4444 (and other 4:4:4 sources) decode at full chroma resolution —
6// Cb / Cr planes match the luma plane in both dimensions. The encoder side
7// (rav1e + HW backends) only accepts 4:2:0, where chroma is half-resolution
8// in both axes. This module bridges the gap with a 2×2 box-average filter:
9// for each 2×2 block of source chroma, output one chroma sample equal to
10// the rounded mean. Y plane is unchanged (full-resolution luma in both
11// formats — 4:4:4 and 4:2:0 differ only in chroma layout).
12//
13// Filter choice: 2×2 box average. The simplest correct filter for 4:4:4
14// → 4:2:0 chroma siting (MPEG-2 left-aligned). For each output sample at
15// (cx, cy), input samples are (2*cx, 2*cy), (2*cx+1, 2*cy), (2*cx, 2*cy+1),
16// (2*cx+1, 2*cy+1). Output is `(s00 + s01 + s10 + s11 + 2) >> 2` —
17// rounding by adding half the divisor before truncating shift.
18//
19// Higher-quality alternatives (6-tap separable FIR per BT.601/709 H.131,
20// or a Lanczos-2 horizontal+vertical pair) are deferred to a follow-up;
21// they cost ~10× the cycles for ~0.3 dB chroma PSNR improvement, which
22// most consumer transcoders consider not worth it. The box average matches
23// libswscale's default 4:4:4 → 4:2:0 path when no scaler is requested.
24//
25// Odd-dimension policy: when the source width or height is odd, the output
26// dimensions round up (`(src + 1) / 2`), and the rightmost / bottom row of
27// 2×2 blocks straddles a single source row/column. We **clamp** — the
28// missing neighbour reuses the in-bounds sample. Clamping vs replication
29// is identical for a 1-pixel boundary; we pick clamping because it's the
30// simplest scalar implementation and matches what libswscale does.
31//
32// Alpha plane (Yuva444p10le): the 4:2:0 encoder format has no alpha. We
33// **drop** alpha with a single warn-log (in pipeline integration). AV1
34// has alpha support in some experimental profiles but rav1e 0.7 doesn't
35// expose it, and pre-compositing onto a black background changes pixel
36// values — keying / compositing on the source side would have already
37// happened. Documented in SUPPORTED.md.
38
39use anyhow::{Result, bail};
40use bytes::Bytes;
41
42use crate::frame::{PixelFormat, VideoFrame};
43
44/// 2×2 box-average chroma downsample for 8-bit `Yuv444p` → `Yuv420p`.
45/// Y plane is copied verbatim; Cb and Cr planes shrink 2× in each axis
46/// with rounded averages.
47///
48/// Output dimensions: chroma plane is `((width + 1) / 2) × ((height + 1) / 2)`,
49/// which matches the encoder's 4:2:0 expectation for any input dims
50/// (odd or even). For the common even case (e.g. 1920×1080) this is
51/// 960×540 chroma per plane.
52///
53/// Returns the new packed `Yuv420p` byte buffer (Y || Cb || Cr).
54pub fn downsample_chroma_444_to_420(
55 y: &[u8],
56 cb: &[u8],
57 cr: &[u8],
58 width: usize,
59 height: usize,
60) -> Vec<u8> {
61 debug_assert_eq!(y.len(), width * height, "Y plane size");
62 debug_assert_eq!(cb.len(), width * height, "Cb plane size (4:4:4)");
63 debug_assert_eq!(cr.len(), width * height, "Cr plane size (4:4:4)");
64
65 let cw = width.div_ceil(2);
66 let ch = height.div_ceil(2);
67
68 let mut out = Vec::with_capacity(width * height + 2 * cw * ch);
69
70 // Y plane: straight copy. Luma resolution is identical between
71 // 4:4:4 and 4:2:0.
72 out.extend_from_slice(y);
73
74 // Cb then Cr — same algorithm per plane.
75 for plane in [cb, cr] {
76 for cy in 0..ch {
77 // Source rows: 2*cy and 2*cy+1, clamped to height-1.
78 let y0 = 2 * cy;
79 let y1 = (y0 + 1).min(height - 1);
80 for cx in 0..cw {
81 let x0 = 2 * cx;
82 let x1 = (x0 + 1).min(width - 1);
83 // Box average. 8-bit max is 255 × 4 = 1020, fits in u16.
84 let s00 = plane[y0 * width + x0] as u16;
85 let s01 = plane[y0 * width + x1] as u16;
86 let s10 = plane[y1 * width + x0] as u16;
87 let s11 = plane[y1 * width + x1] as u16;
88 let avg = ((s00 + s01 + s10 + s11 + 2) >> 2) as u8;
89 out.push(avg);
90 }
91 }
92 }
93
94 out
95}
96
97/// 10-bit variant for `Yuv444p10le` → `Yuv420p10le`. Operates on `u16`
98/// samples in the 0..=1023 range; output samples are written as LE
99/// `u16` bytes packed alongside the copied Y plane.
100///
101/// Accumulator: `u32`. Worst case 4 × 1023 + 2 = 4094 fits comfortably
102/// in `u16` already, but `u32` keeps the math aligned with the spec
103/// recommendation (BT.709 Annex A) and allows easy future swap to a
104/// wider filter without overflow rework.
105pub fn downsample_chroma_444_to_420_10bit(
106 y: &[u16],
107 cb: &[u16],
108 cr: &[u16],
109 width: usize,
110 height: usize,
111) -> Vec<u8> {
112 debug_assert_eq!(y.len(), width * height, "Y plane samples");
113 debug_assert_eq!(cb.len(), width * height, "Cb plane samples (4:4:4)");
114 debug_assert_eq!(cr.len(), width * height, "Cr plane samples (4:4:4)");
115
116 let cw = width.div_ceil(2);
117 let ch = height.div_ceil(2);
118 let total_samples = width * height + 2 * cw * ch;
119 let mut out = Vec::with_capacity(total_samples * 2);
120
121 // Y plane: emit as u16 LE bytes. Y is unchanged (full luma).
122 for &s in y {
123 out.extend_from_slice(&s.to_le_bytes());
124 }
125
126 for plane in [cb, cr] {
127 for cy in 0..ch {
128 let y0 = 2 * cy;
129 let y1 = (y0 + 1).min(height - 1);
130 for cx in 0..cw {
131 let x0 = 2 * cx;
132 let x1 = (x0 + 1).min(width - 1);
133 let s00 = plane[y0 * width + x0] as u32;
134 let s01 = plane[y0 * width + x1] as u32;
135 let s10 = plane[y1 * width + x0] as u32;
136 let s11 = plane[y1 * width + x1] as u32;
137 let avg = ((s00 + s01 + s10 + s11 + 2) >> 2) as u16;
138 out.extend_from_slice(&avg.to_le_bytes());
139 }
140 }
141 }
142
143 out
144}
145
146/// High-level frame-shaped wrapper. Takes a `Yuv444p10le` /
147/// `Yuva444p10le` `VideoFrame` and returns a `Yuv420p10le`
148/// `VideoFrame` ready for the 10-bit AV1 encoder. Alpha plane (if
149/// present) is **dropped** with a warn-log — see module docstring for
150/// rationale. 8-bit equivalent (`Yuv444p` → `Yuv420p`) follows the
151/// same pattern, plumbed through `downsample_chroma_444_to_420`.
152///
153/// Errors if the source format is not 4:4:4.
154pub fn downsample_444_to_420_frame(frame: &VideoFrame) -> Result<VideoFrame> {
155 let w = frame.width as usize;
156 let h = frame.height as usize;
157 if w == 0 || h == 0 {
158 bail!("zero-dimension frame");
159 }
160
161 match frame.format {
162 PixelFormat::Yuv444p => {
163 let plane = w * h;
164 if frame.data.len() < 3 * plane {
165 bail!(
166 "Yuv444p frame data too short for {}x{}: {} bytes",
167 w,
168 h,
169 frame.data.len()
170 );
171 }
172 let y = &frame.data[..plane];
173 let cb = &frame.data[plane..2 * plane];
174 let cr = &frame.data[2 * plane..3 * plane];
175 let out = downsample_chroma_444_to_420(y, cb, cr, w, h);
176 Ok(VideoFrame::new(
177 Bytes::from(out),
178 frame.width,
179 frame.height,
180 PixelFormat::Yuv420p,
181 frame.color_space,
182 frame.pts,
183 ))
184 }
185 PixelFormat::Yuv444p10le | PixelFormat::Yuva444p10le => {
186 let plane = w * h;
187 // 10-bit (or 16-bit alpha) is 2 bytes/sample. Y/Cb/Cr always
188 // 10-bit, alpha (if present) is 16-bit, but layout is per-
189 // plane LE u16 either way. We only consume the first three
190 // planes; alpha (plane 4) is dropped on the floor.
191 let needed = if frame.format == PixelFormat::Yuva444p10le {
192 4 * plane * 2
193 } else {
194 3 * plane * 2
195 };
196 if frame.data.len() < needed {
197 bail!(
198 "{:?} frame data too short for {}x{}: {} bytes (need {})",
199 frame.format,
200 w,
201 h,
202 frame.data.len(),
203 needed
204 );
205 }
206 // Decode three u16 LE planes from the source bytes.
207 let y = super::read_u16le(&frame.data[..plane * 2]);
208 let cb = super::read_u16le(&frame.data[plane * 2..2 * plane * 2]);
209 let cr = super::read_u16le(&frame.data[2 * plane * 2..3 * plane * 2]);
210
211 if frame.format == PixelFormat::Yuva444p10le {
212 tracing::warn!(
213 pts = frame.pts,
214 "dropping alpha plane on 4:4:4→4:2:0 downsample \
215 (rav1e 0.7 has no alpha; pipeline target is Yuv420p10le)"
216 );
217 }
218
219 let out = downsample_chroma_444_to_420_10bit(&y, &cb, &cr, w, h);
220 Ok(VideoFrame::new(
221 Bytes::from(out),
222 frame.width,
223 frame.height,
224 PixelFormat::Yuv420p10le,
225 frame.color_space,
226 frame.pts,
227 ))
228 }
229 other => bail!(
230 "downsample_444_to_420_frame: expected 4:4:4 input, got {:?}",
231 other
232 ),
233 }
234}