1use fearless_simd::{Level, dispatch, f32x4, prelude::*};
20
21#[cfg(target_arch = "aarch64")]
22use core::arch::aarch64::{float32x4_t, vcopyq_laneq_f32};
23#[cfg(target_arch = "x86")]
24use core::arch::x86::{__m128, _mm_blend_ps};
25#[cfg(target_arch = "x86_64")]
26use core::arch::x86_64::{__m128, _mm_blend_ps};
27
28fearless_simd::kernel!(
29 #[inline]
31 fn copy_alpha_neon(neon: Neon, a: float32x4_t, b: float32x4_t) -> float32x4_t {
32 vcopyq_laneq_f32::<3, 3>(a, b)
33 }
34);
35
36fearless_simd::kernel!(
37 #[inline]
39 fn copy_alpha_sse4_2(sse4_2: Sse4_2, a: __m128, b: __m128) -> __m128 {
40 _mm_blend_ps::<8>(a, b)
41 }
42);
43
44#[inline(always)]
49fn copy_alpha<S: Simd>(a: f32x4<S>, b: f32x4<S>) -> f32x4<S> {
50 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
51 if let Some(sse4_2) = a.simd.level().as_sse4_2() {
52 return copy_alpha_sse4_2(sse4_2, a.into(), b.into()).simd_into(a.simd);
53 }
54
55 #[cfg(target_arch = "aarch64")]
56 if let Some(neon) = a.simd.level().as_neon() {
57 return copy_alpha_neon(neon, a.into(), b.into()).simd_into(a.simd);
58 }
59
60 let mut result = a;
61 result[3] = b[3];
62 result
63}
64
65#[inline(always)]
67fn to_srgb<S: Simd>(simd: S, rgba: [f32; 4]) -> [f32; 4] {
68 let v: f32x4<S> = rgba.simd_into(simd);
69 let vabs = v.abs();
70 let x = vabs - 5.358_626_4e-4;
71 let x2 = x * x;
72 let even1 = x * -9.127_959e-1 + -2.881_431_4e-2;
73 let even2 = x2 * -7.291_929e-1 + even1;
74 let odd1 = x * 1.061_331_7 + 1.401_945_4;
75 let odd2 = x2 * 2.077_583e-1 + odd1;
76 let poly = odd2 * x.sqrt() + even2;
77 let lin = vabs * 12.92;
78 let z = vabs.simd_gt(0.0031308).select(poly, lin);
79 let z_signed = z.copysign(v);
80 let result = copy_alpha(z_signed, v);
81 result.into()
82}
83
84fn main() {
85 let level = Level::new();
86 let rgba = [0.1, -0.2, 0.001, 0.4];
87 let srgb = dispatch!(level, simd=> to_srgb(simd, rgba));
88 println!("{srgb:?}");
89}