codec/tonemap.rs
1//! HDR → SDR tonemap.
2//!
3//! Pipeline: 10-bit BT.2020 PQ/HLG Y'CbCr → linear scene-referred RGB →
4//! BT.709 gamut → Hable filmic curve → BT.709 gamma → 8-bit BT.709
5//! limited-range Y'CbCr.
6//!
7//! Single-output policy: every HDR upload gets tonemapped to SDR at
8//! transcode time and the encoded ABR ladder is 8-bit BT.709. Every
9//! viewer sees a correctly-mapped image regardless of display capability.
10//! HDR-fidelity-for-HDR-viewers is a future dual-rendition path that
11//! will reuse the same primitives for the SDR rungs.
12//!
13//! Reference standards:
14//! - ITU-R BT.2020 (matrix + primaries)
15//! - SMPTE ST.2084 (PQ EOTF)
16//! - ARIB STD-B67 (HLG EOTF)
17//! - ITU-R BT.709 (output matrix + transfer)
18//! - "Filmic Tonemapping for Real-time Rendering" — John Hable, 2010
19//!
20//! Implementation is pure-Rust scalar f32. AVX2 vectorisation is a
21//! follow-up — the kernel is hot (per-pixel two matrix multiplies + one
22//! transcendental on each channel + the filmic curve), but it's
23//! single-threaded with a per-frame fan-out, so the per-thread budget
24//! lands well inside a 1080p60 real-time window even scalar.
25
26use anyhow::{Result, bail};
27use bytes::Bytes;
28
29use crate::frame::{ColorSpace, PixelFormat, TransferFn, VideoFrame};
30
31// ── transfer (EOTF inverse: encoded → scene linear) ───────────────────
32
33/// PQ inverse EOTF (SMPTE ST.2084).
34///
35/// Returns scene-linear in units where `1.0 = 100 cd/m² SDR diffuse white`,
36/// so `100.0 = 10,000 nits PQ peak`. Tonemap operates in the same scene-
37/// linear frame.
38#[inline(always)]
39fn pq_to_linear(n: f32) -> f32 {
40 const M1_INV: f32 = 1.0 / 0.159_301_76;
41 const M2_INV: f32 = 1.0 / 78.84375;
42 const C1: f32 = 0.8359375;
43 const C2: f32 = 18.851_563;
44 const C3: f32 = 18.6875;
45 let np = n.max(0.0).powf(M2_INV);
46 let num = (np - C1).max(0.0);
47 let den = C2 - C3 * np;
48 if den <= 0.0 {
49 return 0.0;
50 }
51 let lin01 = (num / den).powf(M1_INV); // 0..1, 1.0 = 10,000 nits
52 lin01 * 100.0 // rescale so 1.0 = SDR diffuse white (~100 nits)
53}
54
55/// HLG inverse OETF (ARIB STD-B67) followed by the SDR-target OOTF
56/// (γ=1.2) — the Apple-published / BBC-R&D recipe for HLG → BT.709
57/// conversion.
58///
59/// The OOTF is the load-bearing piece: HLG signals are SCENE-referred
60/// (the encoded value is the camera's view of light, not a display
61/// luminance). Without applying the OOTF, the tonemap operates on
62/// raw scene values and midtones land in the wrong place — iPhone
63/// HLG content famously reads as ~1 stop too bright on every
64/// generic HDR-passthrough or naive-tonemap pipeline because their
65/// camera assumes Apple's downstream tonemapper handles the
66/// scene→display transform.
67///
68/// Apple's documented gamma for SDR target: γ=1.2 (per "HDR Editing
69/// Best Practices in iOS / macOS", WWDC 2020 + ARIB STD-B67 §3.3).
70/// We apply per-channel for simplicity (the "constant luminance"
71/// version uses Y_s = max(R,G,B) as the base; per-channel is what
72/// most consumer HLG decoders ship and is accurate enough for
73/// social-media playback).
74///
75/// Returns scene-linear-OOTF'd in the same 1.0=100-nit-SDR-white
76/// frame as PQ so downstream tonemap math is uniform.
77#[inline(always)]
78fn hlg_to_linear(e: f32) -> f32 {
79 const A: f32 = 0.17883277;
80 const B: f32 = 1.0 - 4.0 * A;
81 // c = 0.5 - a * ln(4a). Hardcoded so we don't pay for a runtime ln().
82 const C: f32 = 0.559_910_7;
83 /// SDR-target system gamma per BBC R&D / Apple HLG → BT.709 spec.
84 const HLG_OOTF_GAMMA: f32 = 1.2;
85
86 let e = e.max(0.0);
87 // Step 1: inverse OETF — encoded HLG value → scene-linear (0..1
88 // where 1.0 is the HLG peak, typically interpreted as 1000 nits
89 // on a reference display).
90 let scene_lin = if e <= 0.5 {
91 (e * e) / 3.0
92 } else {
93 ((((e - C) / A).exp()) + B) / 12.0
94 };
95 // Step 2: OOTF — scene-linear → display-linear with γ=1.2 for
96 // SDR target. Per-channel approximation. Naturally compresses
97 // the iPhone "1-stop bright" overshoot since values >1 raised
98 // to 1.2 expand and then get clipped by Hable's max_white.
99 let display_lin = scene_lin.powf(HLG_OOTF_GAMMA);
100 // Step 3: rescale to the 1.0=100-nit-SDR-white frame the tonemap
101 // expects. HLG peak (1.0 → after OOTF still 1.0) maps to 10.0
102 // here, same as PQ's 1000-nit reference.
103 display_lin * 10.0
104}
105
106#[inline(always)]
107fn dispatch_eotf(transfer: TransferFn, encoded: f32) -> f32 {
108 match transfer {
109 TransferFn::St2084 => pq_to_linear(encoded),
110 TransferFn::AribStdB67 => hlg_to_linear(encoded),
111 // Defensive: a non-HDR transfer reaching this path is a caller
112 // bug — we've gated dispatch on `is_hdr` upstream. Treat as
113 // identity rather than panicking so partial bugs don't take
114 // out playback.
115 _ => encoded.max(0.0),
116 }
117}
118
119// ── tonemap (Hable filmic) ────────────────────────────────────────────
120
121/// Uncharted 2 partial — the building block of Hable's filmic curve.
122/// Numbers are Hable's published coefficients verbatim.
123#[inline(always)]
124fn hable_partial(x: f32) -> f32 {
125 const A: f32 = 0.15;
126 const B: f32 = 0.50;
127 const C: f32 = 0.10;
128 const D: f32 = 0.20;
129 const E: f32 = 0.02;
130 const F: f32 = 0.30;
131 ((x * (A * x + C * B) + D * E) / (x * (A * x + B) + D * F)) - E / F
132}
133
134/// Hable filmic tonemap. Input is scene-linear (1.0 = SDR diffuse
135/// white reference). `max_white` is the scene-linear value that should
136/// map to display white (1.0 SDR-linear out) — typically the source's
137/// MaxCLL or the master display max luminance, divided by 100.
138#[inline(always)]
139fn hable_tonemap(x: f32, max_white: f32) -> f32 {
140 // 2.0 exposure bias (Hable's recommended default — gives the toe
141 // a film-stock feel and lifts midtones slightly).
142 const EXPOSURE: f32 = 2.0;
143 let curr = hable_partial(x * EXPOSURE);
144 let scale = 1.0 / hable_partial(max_white * EXPOSURE);
145 (curr * scale).clamp(0.0, 1.0)
146}
147
148// ── BT.709 OETF (linear → gamma-encoded) ──────────────────────────────
149
150#[inline(always)]
151fn bt709_oetf(l: f32) -> f32 {
152 let l = l.clamp(0.0, 1.0);
153 if l < 0.018 {
154 4.5 * l
155 } else {
156 1.099 * l.powf(0.45) - 0.099
157 }
158}
159
160// ── matrix coefficients ───────────────────────────────────────────────
161
162/// BT.2020 NCL Y'CbCr → R'G'B' (still in encoded transfer function
163/// domain). Cb / Cr inputs are normalised to [-0.5, 0.5].
164#[inline(always)]
165fn yuv2020ncl_to_rgb(y: f32, cb: f32, cr: f32) -> (f32, f32, f32) {
166 // Kr = 0.2627, Kb = 0.0593, Kg = 1 - Kr - Kb = 0.6780.
167 let r = y + 1.4746 * cr;
168 let g = y - 0.16455 * cb - 0.57135 * cr;
169 let b = y + 1.8814 * cb;
170 (r, g, b)
171}
172
173/// Linear RGB BT.2020 → Linear RGB BT.709 (D65 white-point matched).
174/// Negative coefficients are intentional — gamut conversion can produce
175/// out-of-gamut values which we clip on the OETF input side.
176#[inline(always)]
177fn rgb2020_to_rgb709_linear(r: f32, g: f32, b: f32) -> (f32, f32, f32) {
178 let r_out = 1.66049 * r - 0.58764 * g - 0.07285 * b;
179 let g_out = -0.12455 * r + 1.13290 * g - 0.01006 * b;
180 let b_out = -0.01815 * r - 0.10058 * g + 1.11873 * b;
181 (r_out, g_out, b_out)
182}
183
184/// R'G'B' BT.709 (gamma) → Y'CbCr 8-bit limited range.
185/// Output triplet is (y, cb, cr) ∈ [16..235], [16..240], [16..240].
186#[inline(always)]
187fn rgb709_to_yuv709_limited(r: f32, g: f32, b: f32) -> (u8, u8, u8) {
188 // Kr = 0.2126, Kb = 0.0722.
189 let y = 0.2126 * r + 0.7152 * g + 0.0722 * b;
190 let cb = (b - y) / 1.8556;
191 let cr = (r - y) / 1.5748;
192 let y8 = (y * 219.0 + 16.0).round().clamp(16.0, 235.0) as u8;
193 let cb8 = (cb * 224.0 + 128.0).round().clamp(16.0, 240.0) as u8;
194 let cr8 = (cr * 224.0 + 128.0).round().clamp(16.0, 240.0) as u8;
195 (y8, cb8, cr8)
196}
197
198// ── chroma desub: 10-bit Y'CbCr code → normalised float ───────────────
199
200const Y_BLACK_10: f32 = 64.0; // 16 << 2
201const Y_RANGE_10: f32 = 876.0; // (235 - 16) << 2
202const C_NEUTRAL_10: f32 = 512.0; // 128 << 2
203const C_HALFRANGE_10: f32 = 448.0; // 224/2 << 2
204
205#[inline(always)]
206fn y10_to_normalised(y: u16) -> f32 {
207 (y as f32 - Y_BLACK_10) / Y_RANGE_10
208}
209
210#[inline(always)]
211fn c10_to_normalised(c: u16) -> f32 {
212 (c as f32 - C_NEUTRAL_10) / (C_HALFRANGE_10 * 2.0)
213}
214
215// ── public entry ──────────────────────────────────────────────────────
216
217/// Default scene-linear white point when the source carries no
218/// mastering display metadata. Picked to match a typical HDR10 master
219/// at 1000-nit peak — most consumer HDR content. Sources tagged with
220/// `mastering_display.max_luminance` use that exact value instead.
221const DEFAULT_MAX_WHITE_NITS: f32 = 1000.0;
222
223/// HDR → SDR tonemap.
224///
225/// Input must be `Yuv420p10le` (BT.2020 NCL is assumed; CL would need
226/// a different matrix). Output is `Yuv420p` (8-bit, BT.709 limited).
227///
228/// `transfer` selects the EOTF (PQ vs HLG). `max_white_nits` is the
229/// scene-linear white point used to scale the Hable curve — pass the
230/// source's mastering-display `max_luminance` (in cd/m²) when present;
231/// otherwise `DEFAULT_MAX_WHITE_NITS`.
232///
233/// Implementation: per-pixel Y conversion at full resolution; chroma
234/// downsampled by averaging the 2×2 luma-area RGB output back into a
235/// single (cb, cr) per chroma sample. This is more expensive than a
236/// "tonemap once per chroma block" approach but avoids the hue shifts
237/// that can show up at high luminance on subsampled-tonemap output.
238pub fn tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(
239 src: &VideoFrame,
240 transfer: TransferFn,
241 max_white_nits: Option<f32>,
242) -> Result<VideoFrame> {
243 if !matches!(src.format, PixelFormat::Yuv420p10le) {
244 bail!(
245 "tonemap_yuv420p10le_bt2020_to_yuv420p_bt709 expects Yuv420p10le; got {:?}",
246 src.format
247 );
248 }
249 let w = src.width as usize;
250 let h = src.height as usize;
251 if w == 0 || h == 0 || (w & 1) != 0 || (h & 1) != 0 {
252 bail!("tonemap requires non-zero even dimensions; got {}x{}", w, h);
253 }
254
255 let max_white = (max_white_nits.unwrap_or(DEFAULT_MAX_WHITE_NITS) / 100.0).max(1.0);
256
257 let y_plane_bytes = w * h * 2;
258 let c_plane_bytes = (w / 2) * (h / 2) * 2;
259 if src.data.len() < y_plane_bytes + 2 * c_plane_bytes {
260 bail!(
261 "Yuv420p10le frame too small for {}x{}: need {} bytes, got {}",
262 w,
263 h,
264 y_plane_bytes + 2 * c_plane_bytes,
265 src.data.len()
266 );
267 }
268
269 // Reinterpret the byte slice as u16 LE planes. Endianness assumed
270 // little — every host we ship to is x86_64 / aarch64 LE; a future
271 // BE platform would need byteswap helpers here.
272 let bytes = src.data.as_ref();
273 let y_plane: &[u16] =
274 unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const u16, w * h) };
275 let cb_plane: &[u16] = unsafe {
276 std::slice::from_raw_parts(
277 bytes.as_ptr().add(y_plane_bytes) as *const u16,
278 (w / 2) * (h / 2),
279 )
280 };
281 let cr_plane: &[u16] = unsafe {
282 std::slice::from_raw_parts(
283 bytes.as_ptr().add(y_plane_bytes + c_plane_bytes) as *const u16,
284 (w / 2) * (h / 2),
285 )
286 };
287
288 let mut out_y = vec![0u8; w * h];
289 let mut out_cb = vec![0u8; (w / 2) * (h / 2)];
290 let mut out_cr = vec![0u8; (w / 2) * (h / 2)];
291
292 // Walk in 2x2 blocks so we can downsample the chroma in lockstep.
293 for by in 0..(h / 2) {
294 for bx in 0..(w / 2) {
295 let cb_n = c10_to_normalised(cb_plane[by * (w / 2) + bx]);
296 let cr_n = c10_to_normalised(cr_plane[by * (w / 2) + bx]);
297
298 let mut acc_cb = 0.0_f32;
299 let mut acc_cr = 0.0_f32;
300
301 for dy in 0..2 {
302 for dx in 0..2 {
303 let yi = by * 2 + dy;
304 let xi = bx * 2 + dx;
305 let y_n = y10_to_normalised(y_plane[yi * w + xi]);
306
307 // 1. BT.2020 NCL Y'CbCr → R'G'B' (still gamma).
308 let (r_g, g_g, b_g) = yuv2020ncl_to_rgb(y_n, cb_n, cr_n);
309
310 // 2. EOTF inverse: gamma → scene linear (1.0 = SDR diffuse).
311 let r_lin = dispatch_eotf(transfer, r_g);
312 let g_lin = dispatch_eotf(transfer, g_g);
313 let b_lin = dispatch_eotf(transfer, b_g);
314
315 // 3. Gamut convert: linear BT.2020 → linear BT.709.
316 let (r709, g709, b709) = rgb2020_to_rgb709_linear(r_lin, g_lin, b_lin);
317
318 // 4. Tonemap each channel (per-channel preserves
319 // saturation better than luminance-only at the
320 // cost of slightly less perceptually uniform
321 // response — Hable's published recipe uses
322 // per-channel).
323 let r_tm = hable_tonemap(r709, max_white);
324 let g_tm = hable_tonemap(g709, max_white);
325 let b_tm = hable_tonemap(b709, max_white);
326
327 // 5. OETF: linear → BT.709 gamma encoded.
328 let r_o = bt709_oetf(r_tm);
329 let g_o = bt709_oetf(g_tm);
330 let b_o = bt709_oetf(b_tm);
331
332 // 6. RGB → Y'CbCr 8-bit BT.709 limited.
333 let (y8, cb8, cr8) = rgb709_to_yuv709_limited(r_o, g_o, b_o);
334 out_y[yi * w + xi] = y8;
335 acc_cb += cb8 as f32;
336 acc_cr += cr8 as f32;
337 }
338 }
339
340 // Downsample chroma: average the 4 per-pixel chroma values
341 // back to one sample per 2x2 block (4:2:0 layout).
342 out_cb[by * (w / 2) + bx] = (acc_cb * 0.25).round() as u8;
343 out_cr[by * (w / 2) + bx] = (acc_cr * 0.25).round() as u8;
344 }
345 }
346
347 let mut out = Vec::with_capacity(w * h + 2 * (w / 2) * (h / 2));
348 out.extend_from_slice(&out_y);
349 out.extend_from_slice(&out_cb);
350 out.extend_from_slice(&out_cr);
351
352 Ok(VideoFrame::new(
353 Bytes::from(out),
354 src.width,
355 src.height,
356 PixelFormat::Yuv420p,
357 ColorSpace::Bt709,
358 src.pts,
359 ))
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365
366 fn make_solid_yuv420p10le(w: u32, h: u32, y10: u16, cb10: u16, cr10: u16) -> VideoFrame {
367 let mut bytes = Vec::with_capacity((w * h * 2 + 2 * (w / 2) * (h / 2) * 2) as usize);
368 for _ in 0..(w * h) {
369 bytes.extend_from_slice(&y10.to_le_bytes());
370 }
371 for _ in 0..((w / 2) * (h / 2)) {
372 bytes.extend_from_slice(&cb10.to_le_bytes());
373 }
374 for _ in 0..((w / 2) * (h / 2)) {
375 bytes.extend_from_slice(&cr10.to_le_bytes());
376 }
377 VideoFrame::new(
378 Bytes::from(bytes),
379 w,
380 h,
381 PixelFormat::Yuv420p10le,
382 ColorSpace::Bt2020,
383 0,
384 )
385 }
386
387 #[test]
388 fn tonemap_solid_pq_black_yields_sdr_black() {
389 // 10-bit limited-range black: Y=64, Cb=Cr=512.
390 let src = make_solid_yuv420p10le(16, 16, 64, 512, 512);
391 let out = tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::St2084, None)
392 .expect("tonemap");
393 assert_eq!(out.format, PixelFormat::Yuv420p);
394 assert_eq!(out.color_space, ColorSpace::Bt709);
395 let y = out.data[0];
396 let cb = out.data[16 * 16];
397 let cr = out.data[16 * 16 + 8 * 8];
398 // Black should map to BT.709 limited black: Y≈16, Cb≈Cr≈128.
399 assert!((y as i32 - 16).abs() <= 1, "Y near 16, got {}", y);
400 assert!((cb as i32 - 128).abs() <= 1, "Cb near 128, got {}", cb);
401 assert!((cr as i32 - 128).abs() <= 1, "Cr near 128, got {}", cr);
402 }
403
404 #[test]
405 fn tonemap_solid_pq_white_clipped_under_one() {
406 // 10-bit PQ peak: Y=940 (limited-range white).
407 let src = make_solid_yuv420p10le(16, 16, 940, 512, 512);
408 let out =
409 tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::St2084, Some(1000.0))
410 .expect("tonemap");
411 let y = out.data[0];
412 // PQ "white" code corresponds to 10,000 nits absolute. At
413 // max_white=1000 nits, that's 10x overrange — Hable curve
414 // saturates near 1.0, OETF gives ~235 limited-range. Allow
415 // a small numerical margin.
416 assert!(y >= 200, "PQ peak should map near limited-white; got {}", y);
417 assert!(y <= 235, "limited-range upper bound 235, got {}", y);
418 }
419
420 #[test]
421 fn tonemap_solid_pq_midgrey_yields_lifted_midgrey() {
422 // PQ encoded ~50% (midpoint code 0.5 → ~92 nits → ~1.0 in
423 // SDR-linear-1.0=100-nits frame). Hable with exposure=2 lifts
424 // this above linear 0.5 → BT.709 OETF gives a code well above
425 // the limited-range mid (Y≈126).
426 let y10 = ((0.5 * Y_RANGE_10) + Y_BLACK_10) as u16;
427 let src = make_solid_yuv420p10le(16, 16, y10, 512, 512);
428 let out = tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::St2084, None)
429 .expect("tonemap");
430 let y = out.data[0];
431 assert!(
432 (130..=210).contains(&y),
433 "PQ ~92 nits should land in upper-mid limited range, got {}",
434 y
435 );
436 }
437
438 #[test]
439 fn tonemap_hlg_path_runs() {
440 // Smoke: HLG black should map near limited-range black.
441 let src = make_solid_yuv420p10le(8, 8, 64, 512, 512);
442 let out = tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::AribStdB67, None)
443 .expect("tonemap HLG");
444 assert!((out.data[0] as i32 - 16).abs() <= 1);
445 }
446
447 #[test]
448 fn tonemap_rejects_wrong_format() {
449 let src = VideoFrame::new(
450 Bytes::from(vec![0u8; 96]),
451 8,
452 8,
453 PixelFormat::Yuv420p,
454 ColorSpace::Bt709,
455 0,
456 );
457 let err = tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::St2084, None)
458 .expect_err("must reject 8-bit input");
459 assert!(format!("{:?}", err).contains("Yuv420p10le"));
460 }
461
462 #[test]
463 fn pq_eotf_monotonic() {
464 // Sanity: EOTF must be monotonically increasing.
465 let mut last = -1.0;
466 for i in 0..=100 {
467 let v = pq_to_linear(i as f32 / 100.0);
468 assert!(v >= last, "non-monotonic at {}: {} < {}", i, v, last);
469 last = v;
470 }
471 }
472
473 #[test]
474 fn hable_tonemap_clamps_to_unit() {
475 // Inputs above max_white should clamp to <= 1.0.
476 for x in [0.0, 1.0, 5.0, 50.0, 500.0_f32] {
477 let v = hable_tonemap(x, 10.0);
478 assert!(v >= 0.0 && v <= 1.0, "out of range at x={}: {}", x, v);
479 }
480 }
481
482 #[test]
483 fn bt709_oetf_inverts_neutral_grey() {
484 // Reference values from ITU-R BT.709 §1.2:
485 // E' = 4.5 * E for 0 ≤ E < 0.018
486 // E' = 1.099 * E^0.45 - 0.099 for 0.018 ≤ E ≤ 1
487 // At E = 0.5: 1.099 * 0.5^0.45 - 0.099 ≈ 0.7055.
488 // At E = 1.0: 1.099 * 1.0 - 0.099 = 1.000.
489 // (Earlier this test asserted 0.7398, which is the sRGB EOTF
490 // value — different transfer function, different formula. The
491 // BT.709 number is materially lower at mid-grey.)
492 assert!((bt709_oetf(0.5) - 0.7055).abs() < 0.01);
493 assert!((bt709_oetf(1.0) - 1.0).abs() < 0.01);
494 }
495}