Skip to main content

srgb/
srgb.rs

1// Copyright 2024 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Converts a single RGBA pixel from linear RGB to sRGB.
5//!
6//! This example demonstrates the usual Fearless SIMD structure:
7//!
8//! - write the main computation as an `#[inline(always)]` function generic over
9//!   [`Simd`];
10//! - use [`dispatch!`] at the non-SIMD boundary to run it with the best
11//!   available target features;
12//! - drop down to [`kernel!`](fearless_simd::kernel) when a small part of the
13//!   computation needs a target-specific intrinsic.
14//!
15//! The RGB channels are converted with portable SIMD operations. The alpha
16//! channel is copied unchanged, using an architecture-specific lane-copy
17//! intrinsic if one is available and a scalar fallback otherwise.
18
19use fearless_simd::{Level, dispatch, f32x4, prelude::*};
20
21#[cfg(target_arch = "aarch64")]
22use core::arch::aarch64::{float32x4_t, vcopyq_laneq_f32};
23#[cfg(target_arch = "x86")]
24use core::arch::x86::{__m128, _mm_blend_ps};
25#[cfg(target_arch = "x86_64")]
26use core::arch::x86_64::{__m128, _mm_blend_ps};
27
28fearless_simd::kernel!(
29    /// Copy the alpha lane on AArch64 using a NEON lane-copy intrinsic.
30    #[inline]
31    fn copy_alpha_neon(neon: Neon, a: float32x4_t, b: float32x4_t) -> float32x4_t {
32        vcopyq_laneq_f32::<3, 3>(a, b)
33    }
34);
35
36fearless_simd::kernel!(
37    /// Copy the alpha lane on x86 using the SSE4.2 token to enable SSE4.1 blend instructions.
38    #[inline]
39    fn copy_alpha_sse4_2(sse4_2: Sse4_2, a: __m128, b: __m128) -> __m128 {
40        _mm_blend_ps::<8>(a, b)
41    }
42);
43
44/// Return `a` with its alpha channel replaced by `b`'s alpha channel.
45///
46/// This helper shows how portable SIMD code can opportunistically call
47/// target-specific kernels while still providing a fallback for every backend.
48#[inline(always)]
49fn copy_alpha<S: Simd>(a: f32x4<S>, b: f32x4<S>) -> f32x4<S> {
50    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
51    if let Some(sse4_2) = a.simd.level().as_sse4_2() {
52        return copy_alpha_sse4_2(sse4_2, a.into(), b.into()).simd_into(a.simd);
53    }
54
55    #[cfg(target_arch = "aarch64")]
56    if let Some(neon) = a.simd.level().as_neon() {
57        return copy_alpha_neon(neon, a.into(), b.into()).simd_into(a.simd);
58    }
59
60    let mut result = a;
61    result[3] = b[3];
62    result
63}
64
65/// Approximate the linear-RGB to sRGB transfer curve for RGB, preserving alpha.
66#[inline(always)]
67fn to_srgb<S: Simd>(simd: S, rgba: [f32; 4]) -> [f32; 4] {
68    let v: f32x4<S> = rgba.simd_into(simd);
69    let vabs = v.abs();
70    let x = vabs - 5.358_626_4e-4;
71    let x2 = x * x;
72    let even1 = x * -9.127_959e-1 + -2.881_431_4e-2;
73    let even2 = x2 * -7.291_929e-1 + even1;
74    let odd1 = x * 1.061_331_7 + 1.401_945_4;
75    let odd2 = x2 * 2.077_583e-1 + odd1;
76    let poly = odd2 * x.sqrt() + even2;
77    let lin = vabs * 12.92;
78    let z = vabs.simd_gt(0.0031308).select(poly, lin);
79    let z_signed = z.copysign(v);
80    let result = copy_alpha(z_signed, v);
81    result.into()
82}
83
84fn main() {
85    let level = Level::new();
86    let rgba = [0.1, -0.2, 0.001, 0.4];
87    let srgb = dispatch!(level, simd=> to_srgb(simd, rgba));
88    println!("{srgb:?}");
89}