Skip to main content

codec/
tonemap.rs

1//! HDR → SDR tonemap.
2//!
3//! Pipeline: 10-bit BT.2020 PQ/HLG Y'CbCr → linear scene-referred RGB →
4//! BT.709 gamut → Hable filmic curve → BT.709 gamma → 8-bit BT.709
5//! limited-range Y'CbCr.
6//!
7//! Single-output policy: every HDR upload gets tonemapped to SDR at
8//! transcode time and the encoded ABR ladder is 8-bit BT.709. Every
9//! viewer sees a correctly-mapped image regardless of display capability.
10//! HDR-fidelity-for-HDR-viewers is a future dual-rendition path that
11//! will reuse the same primitives for the SDR rungs.
12//!
13//! Reference standards:
14//!   - ITU-R BT.2020 (matrix + primaries)
15//!   - SMPTE ST.2084 (PQ EOTF)
16//!   - ARIB STD-B67 (HLG EOTF)
17//!   - ITU-R BT.709 (output matrix + transfer)
18//!   - "Filmic Tonemapping for Real-time Rendering" — John Hable, 2010
19//!
20//! Implementation is pure-Rust scalar f32. AVX2 vectorisation is a
21//! follow-up — the kernel is hot (per-pixel two matrix multiplies + one
22//! transcendental on each channel + the filmic curve), but it's
23//! single-threaded with a per-frame fan-out, so the per-thread budget
24//! lands well inside a 1080p60 real-time window even scalar.
25
26use anyhow::{Result, bail};
27use bytes::Bytes;
28
29use crate::frame::{ColorSpace, PixelFormat, TransferFn, VideoFrame};
30
31// ── transfer (EOTF inverse: encoded → scene linear) ───────────────────
32
33/// PQ inverse EOTF (SMPTE ST.2084).
34///
35/// Returns scene-linear in units where `1.0 = 100 cd/m² SDR diffuse white`,
36/// so `100.0 = 10,000 nits PQ peak`. Tonemap operates in the same scene-
37/// linear frame.
38#[inline(always)]
39fn pq_to_linear(n: f32) -> f32 {
40    const M1_INV: f32 = 1.0 / 0.159_301_76;
41    const M2_INV: f32 = 1.0 / 78.84375;
42    const C1: f32 = 0.8359375;
43    const C2: f32 = 18.851_563;
44    const C3: f32 = 18.6875;
45    let np = n.max(0.0).powf(M2_INV);
46    let num = (np - C1).max(0.0);
47    let den = C2 - C3 * np;
48    if den <= 0.0 {
49        return 0.0;
50    }
51    let lin01 = (num / den).powf(M1_INV); // 0..1, 1.0 = 10,000 nits
52    lin01 * 100.0 // rescale so 1.0 = SDR diffuse white (~100 nits)
53}
54
55/// HLG inverse OETF (ARIB STD-B67) followed by the SDR-target OOTF
56/// (γ=1.2) — the Apple-published / BBC-R&D recipe for HLG → BT.709
57/// conversion.
58///
59/// The OOTF is the load-bearing piece: HLG signals are SCENE-referred
60/// (the encoded value is the camera's view of light, not a display
61/// luminance). Without applying the OOTF, the tonemap operates on
62/// raw scene values and midtones land in the wrong place — iPhone
63/// HLG content famously reads as ~1 stop too bright on every
64/// generic HDR-passthrough or naive-tonemap pipeline because their
65/// camera assumes Apple's downstream tonemapper handles the
66/// scene→display transform.
67///
68/// Apple's documented gamma for SDR target: γ=1.2 (per "HDR Editing
69/// Best Practices in iOS / macOS", WWDC 2020 + ARIB STD-B67 §3.3).
70/// We apply per-channel for simplicity (the "constant luminance"
71/// version uses Y_s = max(R,G,B) as the base; per-channel is what
72/// most consumer HLG decoders ship and is accurate enough for
73/// social-media playback).
74///
75/// Returns scene-linear-OOTF'd in the same 1.0=100-nit-SDR-white
76/// frame as PQ so downstream tonemap math is uniform.
77#[inline(always)]
78fn hlg_to_linear(e: f32) -> f32 {
79    const A: f32 = 0.17883277;
80    const B: f32 = 1.0 - 4.0 * A;
81    // c = 0.5 - a * ln(4a). Hardcoded so we don't pay for a runtime ln().
82    const C: f32 = 0.559_910_7;
83    /// SDR-target system gamma per BBC R&D / Apple HLG → BT.709 spec.
84    const HLG_OOTF_GAMMA: f32 = 1.2;
85
86    let e = e.max(0.0);
87    // Step 1: inverse OETF — encoded HLG value → scene-linear (0..1
88    // where 1.0 is the HLG peak, typically interpreted as 1000 nits
89    // on a reference display).
90    let scene_lin = if e <= 0.5 {
91        (e * e) / 3.0
92    } else {
93        ((((e - C) / A).exp()) + B) / 12.0
94    };
95    // Step 2: OOTF — scene-linear → display-linear with γ=1.2 for
96    // SDR target. Per-channel approximation. Naturally compresses
97    // the iPhone "1-stop bright" overshoot since values >1 raised
98    // to 1.2 expand and then get clipped by Hable's max_white.
99    let display_lin = scene_lin.powf(HLG_OOTF_GAMMA);
100    // Step 3: rescale to the 1.0=100-nit-SDR-white frame the tonemap
101    // expects. HLG peak (1.0 → after OOTF still 1.0) maps to 10.0
102    // here, same as PQ's 1000-nit reference.
103    display_lin * 10.0
104}
105
106#[inline(always)]
107fn dispatch_eotf(transfer: TransferFn, encoded: f32) -> f32 {
108    match transfer {
109        TransferFn::St2084 => pq_to_linear(encoded),
110        TransferFn::AribStdB67 => hlg_to_linear(encoded),
111        // Defensive: a non-HDR transfer reaching this path is a caller
112        // bug — we've gated dispatch on `is_hdr` upstream. Treat as
113        // identity rather than panicking so partial bugs don't take
114        // out playback.
115        _ => encoded.max(0.0),
116    }
117}
118
119// ── tonemap (Hable filmic) ────────────────────────────────────────────
120
121/// Uncharted 2 partial — the building block of Hable's filmic curve.
122/// Numbers are Hable's published coefficients verbatim.
123#[inline(always)]
124fn hable_partial(x: f32) -> f32 {
125    const A: f32 = 0.15;
126    const B: f32 = 0.50;
127    const C: f32 = 0.10;
128    const D: f32 = 0.20;
129    const E: f32 = 0.02;
130    const F: f32 = 0.30;
131    ((x * (A * x + C * B) + D * E) / (x * (A * x + B) + D * F)) - E / F
132}
133
134/// Hable filmic tonemap. Input is scene-linear (1.0 = SDR diffuse
135/// white reference). `max_white` is the scene-linear value that should
136/// map to display white (1.0 SDR-linear out) — typically the source's
137/// MaxCLL or the master display max luminance, divided by 100.
138#[inline(always)]
139fn hable_tonemap(x: f32, max_white: f32) -> f32 {
140    // 2.0 exposure bias (Hable's recommended default — gives the toe
141    // a film-stock feel and lifts midtones slightly).
142    const EXPOSURE: f32 = 2.0;
143    let curr = hable_partial(x * EXPOSURE);
144    let scale = 1.0 / hable_partial(max_white * EXPOSURE);
145    (curr * scale).clamp(0.0, 1.0)
146}
147
148// ── BT.709 OETF (linear → gamma-encoded) ──────────────────────────────
149
150#[inline(always)]
151fn bt709_oetf(l: f32) -> f32 {
152    let l = l.clamp(0.0, 1.0);
153    if l < 0.018 {
154        4.5 * l
155    } else {
156        1.099 * l.powf(0.45) - 0.099
157    }
158}
159
160// ── matrix coefficients ───────────────────────────────────────────────
161
162/// BT.2020 NCL Y'CbCr → R'G'B' (still in encoded transfer function
163/// domain). Cb / Cr inputs are normalised to [-0.5, 0.5].
164#[inline(always)]
165fn yuv2020ncl_to_rgb(y: f32, cb: f32, cr: f32) -> (f32, f32, f32) {
166    // Kr = 0.2627, Kb = 0.0593, Kg = 1 - Kr - Kb = 0.6780.
167    let r = y + 1.4746 * cr;
168    let g = y - 0.16455 * cb - 0.57135 * cr;
169    let b = y + 1.8814 * cb;
170    (r, g, b)
171}
172
173/// Linear RGB BT.2020 → Linear RGB BT.709 (D65 white-point matched).
174/// Negative coefficients are intentional — gamut conversion can produce
175/// out-of-gamut values which we clip on the OETF input side.
176#[inline(always)]
177fn rgb2020_to_rgb709_linear(r: f32, g: f32, b: f32) -> (f32, f32, f32) {
178    let r_out = 1.66049 * r - 0.58764 * g - 0.07285 * b;
179    let g_out = -0.12455 * r + 1.13290 * g - 0.01006 * b;
180    let b_out = -0.01815 * r - 0.10058 * g + 1.11873 * b;
181    (r_out, g_out, b_out)
182}
183
184/// R'G'B' BT.709 (gamma) → Y'CbCr 8-bit limited range.
185/// Output triplet is (y, cb, cr) ∈ [16..235], [16..240], [16..240].
186#[inline(always)]
187fn rgb709_to_yuv709_limited(r: f32, g: f32, b: f32) -> (u8, u8, u8) {
188    // Kr = 0.2126, Kb = 0.0722.
189    let y = 0.2126 * r + 0.7152 * g + 0.0722 * b;
190    let cb = (b - y) / 1.8556;
191    let cr = (r - y) / 1.5748;
192    let y8 = (y * 219.0 + 16.0).round().clamp(16.0, 235.0) as u8;
193    let cb8 = (cb * 224.0 + 128.0).round().clamp(16.0, 240.0) as u8;
194    let cr8 = (cr * 224.0 + 128.0).round().clamp(16.0, 240.0) as u8;
195    (y8, cb8, cr8)
196}
197
198// ── chroma desub: 10-bit Y'CbCr code → normalised float ───────────────
199
200const Y_BLACK_10: f32 = 64.0; // 16 << 2
201const Y_RANGE_10: f32 = 876.0; // (235 - 16) << 2
202const C_NEUTRAL_10: f32 = 512.0; // 128 << 2
203const C_HALFRANGE_10: f32 = 448.0; // 224/2 << 2
204
205#[inline(always)]
206fn y10_to_normalised(y: u16) -> f32 {
207    (y as f32 - Y_BLACK_10) / Y_RANGE_10
208}
209
210#[inline(always)]
211fn c10_to_normalised(c: u16) -> f32 {
212    (c as f32 - C_NEUTRAL_10) / (C_HALFRANGE_10 * 2.0)
213}
214
215// ── public entry ──────────────────────────────────────────────────────
216
217/// Default scene-linear white point when the source carries no
218/// mastering display metadata. Picked to match a typical HDR10 master
219/// at 1000-nit peak — most consumer HDR content. Sources tagged with
220/// `mastering_display.max_luminance` use that exact value instead.
221const DEFAULT_MAX_WHITE_NITS: f32 = 1000.0;
222
223/// HDR → SDR tonemap.
224///
225/// Input must be `Yuv420p10le` (BT.2020 NCL is assumed; CL would need
226/// a different matrix). Output is `Yuv420p` (8-bit, BT.709 limited).
227///
228/// `transfer` selects the EOTF (PQ vs HLG). `max_white_nits` is the
229/// scene-linear white point used to scale the Hable curve — pass the
230/// source's mastering-display `max_luminance` (in cd/m²) when present;
231/// otherwise `DEFAULT_MAX_WHITE_NITS`.
232///
233/// Implementation: per-pixel Y conversion at full resolution; chroma
234/// downsampled by averaging the 2×2 luma-area RGB output back into a
235/// single (cb, cr) per chroma sample. This is more expensive than a
236/// "tonemap once per chroma block" approach but avoids the hue shifts
237/// that can show up at high luminance on subsampled-tonemap output.
238pub fn tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(
239    src: &VideoFrame,
240    transfer: TransferFn,
241    max_white_nits: Option<f32>,
242) -> Result<VideoFrame> {
243    if !matches!(src.format, PixelFormat::Yuv420p10le) {
244        bail!(
245            "tonemap_yuv420p10le_bt2020_to_yuv420p_bt709 expects Yuv420p10le; got {:?}",
246            src.format
247        );
248    }
249    let w = src.width as usize;
250    let h = src.height as usize;
251    if w == 0 || h == 0 || (w & 1) != 0 || (h & 1) != 0 {
252        bail!("tonemap requires non-zero even dimensions; got {}x{}", w, h);
253    }
254
255    let max_white = (max_white_nits.unwrap_or(DEFAULT_MAX_WHITE_NITS) / 100.0).max(1.0);
256
257    let y_plane_bytes = w * h * 2;
258    let c_plane_bytes = (w / 2) * (h / 2) * 2;
259    if src.data.len() < y_plane_bytes + 2 * c_plane_bytes {
260        bail!(
261            "Yuv420p10le frame too small for {}x{}: need {} bytes, got {}",
262            w,
263            h,
264            y_plane_bytes + 2 * c_plane_bytes,
265            src.data.len()
266        );
267    }
268
269    // Reinterpret the byte slice as u16 LE planes. Endianness assumed
270    // little — every host we ship to is x86_64 / aarch64 LE; a future
271    // BE platform would need byteswap helpers here.
272    let bytes = src.data.as_ref();
273    let y_plane: &[u16] =
274        unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const u16, w * h) };
275    let cb_plane: &[u16] = unsafe {
276        std::slice::from_raw_parts(
277            bytes.as_ptr().add(y_plane_bytes) as *const u16,
278            (w / 2) * (h / 2),
279        )
280    };
281    let cr_plane: &[u16] = unsafe {
282        std::slice::from_raw_parts(
283            bytes.as_ptr().add(y_plane_bytes + c_plane_bytes) as *const u16,
284            (w / 2) * (h / 2),
285        )
286    };
287
288    let mut out_y = vec![0u8; w * h];
289    let mut out_cb = vec![0u8; (w / 2) * (h / 2)];
290    let mut out_cr = vec![0u8; (w / 2) * (h / 2)];
291
292    // Walk in 2x2 blocks so we can downsample the chroma in lockstep.
293    for by in 0..(h / 2) {
294        for bx in 0..(w / 2) {
295            let cb_n = c10_to_normalised(cb_plane[by * (w / 2) + bx]);
296            let cr_n = c10_to_normalised(cr_plane[by * (w / 2) + bx]);
297
298            let mut acc_cb = 0.0_f32;
299            let mut acc_cr = 0.0_f32;
300
301            for dy in 0..2 {
302                for dx in 0..2 {
303                    let yi = by * 2 + dy;
304                    let xi = bx * 2 + dx;
305                    let y_n = y10_to_normalised(y_plane[yi * w + xi]);
306
307                    // 1. BT.2020 NCL Y'CbCr → R'G'B' (still gamma).
308                    let (r_g, g_g, b_g) = yuv2020ncl_to_rgb(y_n, cb_n, cr_n);
309
310                    // 2. EOTF inverse: gamma → scene linear (1.0 = SDR diffuse).
311                    let r_lin = dispatch_eotf(transfer, r_g);
312                    let g_lin = dispatch_eotf(transfer, g_g);
313                    let b_lin = dispatch_eotf(transfer, b_g);
314
315                    // 3. Gamut convert: linear BT.2020 → linear BT.709.
316                    let (r709, g709, b709) = rgb2020_to_rgb709_linear(r_lin, g_lin, b_lin);
317
318                    // 4. Tonemap each channel (per-channel preserves
319                    //    saturation better than luminance-only at the
320                    //    cost of slightly less perceptually uniform
321                    //    response — Hable's published recipe uses
322                    //    per-channel).
323                    let r_tm = hable_tonemap(r709, max_white);
324                    let g_tm = hable_tonemap(g709, max_white);
325                    let b_tm = hable_tonemap(b709, max_white);
326
327                    // 5. OETF: linear → BT.709 gamma encoded.
328                    let r_o = bt709_oetf(r_tm);
329                    let g_o = bt709_oetf(g_tm);
330                    let b_o = bt709_oetf(b_tm);
331
332                    // 6. RGB → Y'CbCr 8-bit BT.709 limited.
333                    let (y8, cb8, cr8) = rgb709_to_yuv709_limited(r_o, g_o, b_o);
334                    out_y[yi * w + xi] = y8;
335                    acc_cb += cb8 as f32;
336                    acc_cr += cr8 as f32;
337                }
338            }
339
340            // Downsample chroma: average the 4 per-pixel chroma values
341            // back to one sample per 2x2 block (4:2:0 layout).
342            out_cb[by * (w / 2) + bx] = (acc_cb * 0.25).round() as u8;
343            out_cr[by * (w / 2) + bx] = (acc_cr * 0.25).round() as u8;
344        }
345    }
346
347    let mut out = Vec::with_capacity(w * h + 2 * (w / 2) * (h / 2));
348    out.extend_from_slice(&out_y);
349    out.extend_from_slice(&out_cb);
350    out.extend_from_slice(&out_cr);
351
352    Ok(VideoFrame::new(
353        Bytes::from(out),
354        src.width,
355        src.height,
356        PixelFormat::Yuv420p,
357        ColorSpace::Bt709,
358        src.pts,
359    ))
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365
366    fn make_solid_yuv420p10le(w: u32, h: u32, y10: u16, cb10: u16, cr10: u16) -> VideoFrame {
367        let mut bytes = Vec::with_capacity((w * h * 2 + 2 * (w / 2) * (h / 2) * 2) as usize);
368        for _ in 0..(w * h) {
369            bytes.extend_from_slice(&y10.to_le_bytes());
370        }
371        for _ in 0..((w / 2) * (h / 2)) {
372            bytes.extend_from_slice(&cb10.to_le_bytes());
373        }
374        for _ in 0..((w / 2) * (h / 2)) {
375            bytes.extend_from_slice(&cr10.to_le_bytes());
376        }
377        VideoFrame::new(
378            Bytes::from(bytes),
379            w,
380            h,
381            PixelFormat::Yuv420p10le,
382            ColorSpace::Bt2020,
383            0,
384        )
385    }
386
387    #[test]
388    fn tonemap_solid_pq_black_yields_sdr_black() {
389        // 10-bit limited-range black: Y=64, Cb=Cr=512.
390        let src = make_solid_yuv420p10le(16, 16, 64, 512, 512);
391        let out = tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::St2084, None)
392            .expect("tonemap");
393        assert_eq!(out.format, PixelFormat::Yuv420p);
394        assert_eq!(out.color_space, ColorSpace::Bt709);
395        let y = out.data[0];
396        let cb = out.data[16 * 16];
397        let cr = out.data[16 * 16 + 8 * 8];
398        // Black should map to BT.709 limited black: Y≈16, Cb≈Cr≈128.
399        assert!((y as i32 - 16).abs() <= 1, "Y near 16, got {}", y);
400        assert!((cb as i32 - 128).abs() <= 1, "Cb near 128, got {}", cb);
401        assert!((cr as i32 - 128).abs() <= 1, "Cr near 128, got {}", cr);
402    }
403
404    #[test]
405    fn tonemap_solid_pq_white_clipped_under_one() {
406        // 10-bit PQ peak: Y=940 (limited-range white).
407        let src = make_solid_yuv420p10le(16, 16, 940, 512, 512);
408        let out =
409            tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::St2084, Some(1000.0))
410                .expect("tonemap");
411        let y = out.data[0];
412        // PQ "white" code corresponds to 10,000 nits absolute. At
413        // max_white=1000 nits, that's 10x overrange — Hable curve
414        // saturates near 1.0, OETF gives ~235 limited-range. Allow
415        // a small numerical margin.
416        assert!(y >= 200, "PQ peak should map near limited-white; got {}", y);
417        assert!(y <= 235, "limited-range upper bound 235, got {}", y);
418    }
419
420    #[test]
421    fn tonemap_solid_pq_midgrey_yields_lifted_midgrey() {
422        // PQ encoded ~50% (midpoint code 0.5 → ~92 nits → ~1.0 in
423        // SDR-linear-1.0=100-nits frame). Hable with exposure=2 lifts
424        // this above linear 0.5 → BT.709 OETF gives a code well above
425        // the limited-range mid (Y≈126).
426        let y10 = ((0.5 * Y_RANGE_10) + Y_BLACK_10) as u16;
427        let src = make_solid_yuv420p10le(16, 16, y10, 512, 512);
428        let out = tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::St2084, None)
429            .expect("tonemap");
430        let y = out.data[0];
431        assert!(
432            (130..=210).contains(&y),
433            "PQ ~92 nits should land in upper-mid limited range, got {}",
434            y
435        );
436    }
437
438    #[test]
439    fn tonemap_hlg_path_runs() {
440        // Smoke: HLG black should map near limited-range black.
441        let src = make_solid_yuv420p10le(8, 8, 64, 512, 512);
442        let out = tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::AribStdB67, None)
443            .expect("tonemap HLG");
444        assert!((out.data[0] as i32 - 16).abs() <= 1);
445    }
446
447    #[test]
448    fn tonemap_rejects_wrong_format() {
449        let src = VideoFrame::new(
450            Bytes::from(vec![0u8; 96]),
451            8,
452            8,
453            PixelFormat::Yuv420p,
454            ColorSpace::Bt709,
455            0,
456        );
457        let err = tonemap_yuv420p10le_bt2020_to_yuv420p_bt709(&src, TransferFn::St2084, None)
458            .expect_err("must reject 8-bit input");
459        assert!(format!("{:?}", err).contains("Yuv420p10le"));
460    }
461
462    #[test]
463    fn pq_eotf_monotonic() {
464        // Sanity: EOTF must be monotonically increasing.
465        let mut last = -1.0;
466        for i in 0..=100 {
467            let v = pq_to_linear(i as f32 / 100.0);
468            assert!(v >= last, "non-monotonic at {}: {} < {}", i, v, last);
469            last = v;
470        }
471    }
472
473    #[test]
474    fn hable_tonemap_clamps_to_unit() {
475        // Inputs above max_white should clamp to <= 1.0.
476        for x in [0.0, 1.0, 5.0, 50.0, 500.0_f32] {
477            let v = hable_tonemap(x, 10.0);
478            assert!(v >= 0.0 && v <= 1.0, "out of range at x={}: {}", x, v);
479        }
480    }
481
482    #[test]
483    fn bt709_oetf_inverts_neutral_grey() {
484        // Reference values from ITU-R BT.709 §1.2:
485        //   E' = 4.5 * E                       for 0 ≤ E < 0.018
486        //   E' = 1.099 * E^0.45 - 0.099        for 0.018 ≤ E ≤ 1
487        // At E = 0.5: 1.099 * 0.5^0.45 - 0.099 ≈ 0.7055.
488        // At E = 1.0: 1.099 * 1.0 - 0.099 = 1.000.
489        // (Earlier this test asserted 0.7398, which is the sRGB EOTF
490        // value — different transfer function, different formula. The
491        // BT.709 number is materially lower at mid-grey.)
492        assert!((bt709_oetf(0.5) - 0.7055).abs() < 0.01);
493        assert!((bt709_oetf(1.0) - 1.0).abs() < 0.01);
494    }
495}