use archmage::rite;
#[cfg(target_arch = "x86_64")]
pub use archmage::X64V3Token;
#[cfg(target_arch = "aarch64")]
pub use archmage::NeonToken;
#[cfg(target_arch = "wasm32")]
pub use archmage::Wasm128Token;
use magetypes::simd::backends::{F32x4Backend, F32x4Convert};
use magetypes::simd::generic::f32x4 as gen_f32x4;
const SRGB_LINEAR_THRESHOLD: f32 = 0.039_293_37;
const LINEAR_THRESHOLD: f32 = 0.003_041_282_6;
const LINEAR_SCALE: f32 = 1.0 / 12.92;
const TWELVE_92: f32 = 12.92;
#[inline(always)]
fn srgb_to_linear_core<T: F32x4Backend>(token: T, srgb: [f32; 4]) -> [f32; 4] {
use crate::rational_poly::{S2L_P, S2L_Q};
let zero = gen_f32x4::<T>::zero(token);
let one = gen_f32x4::<T>::splat(token, 1.0);
let v = gen_f32x4::<T>::from_array(token, srgb);
let clamped = v.max(zero).min(one);
let linear_result = clamped * gen_f32x4::<T>::splat(token, LINEAR_SCALE);
let x = clamped;
let yp =
gen_f32x4::<T>::splat(token, S2L_P[4]).mul_add(x, gen_f32x4::<T>::splat(token, S2L_P[3]));
let yp = yp.mul_add(x, gen_f32x4::<T>::splat(token, S2L_P[2]));
let yp = yp.mul_add(x, gen_f32x4::<T>::splat(token, S2L_P[1]));
let yp = yp.mul_add(x, gen_f32x4::<T>::splat(token, S2L_P[0]));
let yq =
gen_f32x4::<T>::splat(token, S2L_Q[4]).mul_add(x, gen_f32x4::<T>::splat(token, S2L_Q[3]));
let yq = yq.mul_add(x, gen_f32x4::<T>::splat(token, S2L_Q[2]));
let yq = yq.mul_add(x, gen_f32x4::<T>::splat(token, S2L_Q[1]));
let yq = yq.mul_add(x, gen_f32x4::<T>::splat(token, S2L_Q[0]));
let power_result = (yp / yq).min(one);
let mask = clamped.simd_lt(gen_f32x4::<T>::splat(token, SRGB_LINEAR_THRESHOLD));
let result = gen_f32x4::<T>::blend(mask, linear_result, power_result);
let ge_one = v.simd_ge(one);
gen_f32x4::<T>::blend(ge_one, one, result).to_array()
}
#[inline(always)]
fn linear_to_srgb_core<T: F32x4Backend>(token: T, linear: [f32; 4]) -> [f32; 4] {
use crate::rational_poly::{L2S_P, L2S_Q};
let zero = gen_f32x4::<T>::zero(token);
let one = gen_f32x4::<T>::splat(token, 1.0);
let v = gen_f32x4::<T>::from_array(token, linear);
let clamped = v.max(zero).min(one);
let linear_result = clamped * gen_f32x4::<T>::splat(token, TWELVE_92);
let x = clamped.sqrt();
let yp =
gen_f32x4::<T>::splat(token, L2S_P[4]).mul_add(x, gen_f32x4::<T>::splat(token, L2S_P[3]));
let yp = yp.mul_add(x, gen_f32x4::<T>::splat(token, L2S_P[2]));
let yp = yp.mul_add(x, gen_f32x4::<T>::splat(token, L2S_P[1]));
let yp = yp.mul_add(x, gen_f32x4::<T>::splat(token, L2S_P[0]));
let yq =
gen_f32x4::<T>::splat(token, L2S_Q[4]).mul_add(x, gen_f32x4::<T>::splat(token, L2S_Q[3]));
let yq = yq.mul_add(x, gen_f32x4::<T>::splat(token, L2S_Q[2]));
let yq = yq.mul_add(x, gen_f32x4::<T>::splat(token, L2S_Q[1]));
let yq = yq.mul_add(x, gen_f32x4::<T>::splat(token, L2S_Q[0]));
let power_result = (yp / yq).min(one);
let mask = clamped.simd_lt(gen_f32x4::<T>::splat(token, LINEAR_THRESHOLD));
let result = gen_f32x4::<T>::blend(mask, linear_result, power_result);
let ge_one = v.simd_ge(one);
gen_f32x4::<T>::blend(ge_one, one, result).to_array()
}
#[inline(always)]
fn gamma_to_linear_core<T: F32x4Convert>(token: T, encoded: [f32; 4], gamma: f32) -> [f32; 4] {
let zero = gen_f32x4::<T>::zero(token);
let one = gen_f32x4::<T>::splat(token, 1.0);
let v = gen_f32x4::<T>::from_array(token, encoded)
.max(zero)
.min(one);
v.pow_midp(gamma).to_array()
}
#[inline(always)]
fn linear_to_gamma_core<T: F32x4Convert>(token: T, linear: [f32; 4], gamma: f32) -> [f32; 4] {
let zero = gen_f32x4::<T>::zero(token);
let one = gen_f32x4::<T>::splat(token, 1.0);
let v = gen_f32x4::<T>::from_array(token, linear).max(zero).min(one);
v.pow_midp(1.0 / gamma).to_array()
}
#[cfg(target_arch = "x86_64")]
#[rite]
pub fn srgb_to_linear_v3(token: X64V3Token, srgb: [f32; 4]) -> [f32; 4] {
srgb_to_linear_core(token, srgb)
}
#[cfg(target_arch = "x86_64")]
#[rite]
pub fn linear_to_srgb_v3(token: X64V3Token, linear: [f32; 4]) -> [f32; 4] {
linear_to_srgb_core(token, linear)
}
#[archmage::magetypes(v3, neon, wasm128)]
#[rite]
pub fn srgb_to_linear_extended(token: Token, srgb: [f32; 4]) -> [f32; 4] {
use crate::rational_poly::{EXT_S2L_P as P, EXT_S2L_Q as Q};
#[allow(non_camel_case_types)]
type f32x4 = gen_f32x4<Token>;
let zero = f32x4::zero(token);
let v = f32x4::from_array(token, srgb);
let neg_mask = v.simd_lt(zero);
let abs_v = v.abs();
let linear_result = abs_v * f32x4::splat(token, LINEAR_SCALE);
let x = abs_v;
let yp = f32x4::splat(token, P[6]).mul_add(x, f32x4::splat(token, P[5]));
let yp = yp.mul_add(x, f32x4::splat(token, P[4]));
let yp = yp.mul_add(x, f32x4::splat(token, P[3]));
let yp = yp.mul_add(x, f32x4::splat(token, P[2]));
let yp = yp.mul_add(x, f32x4::splat(token, P[1]));
let yp = yp.mul_add(x, f32x4::splat(token, P[0]));
let yq = f32x4::splat(token, Q[6]).mul_add(x, f32x4::splat(token, Q[5]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[4]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[3]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[2]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[1]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[0]));
let power_result = yp / yq;
let thresh_mask = abs_v.simd_lt(f32x4::splat(token, SRGB_LINEAR_THRESHOLD));
let result = f32x4::blend(thresh_mask, linear_result, power_result);
f32x4::blend(neg_mask, -result, result).to_array()
}
#[archmage::magetypes(v3, neon, wasm128)]
#[rite]
pub fn linear_to_srgb_extended(token: Token, linear: [f32; 4]) -> [f32; 4] {
use crate::rational_poly::{EXT_L2S_P as P, EXT_L2S_Q as Q};
#[allow(non_camel_case_types)]
type f32x4 = gen_f32x4<Token>;
let zero = f32x4::zero(token);
let v = f32x4::from_array(token, linear);
let neg_mask = v.simd_lt(zero);
let abs_v = v.abs();
let linear_result = abs_v * f32x4::splat(token, TWELVE_92);
let x = abs_v.sqrt();
let yp = f32x4::splat(token, P[6]).mul_add(x, f32x4::splat(token, P[5]));
let yp = yp.mul_add(x, f32x4::splat(token, P[4]));
let yp = yp.mul_add(x, f32x4::splat(token, P[3]));
let yp = yp.mul_add(x, f32x4::splat(token, P[2]));
let yp = yp.mul_add(x, f32x4::splat(token, P[1]));
let yp = yp.mul_add(x, f32x4::splat(token, P[0]));
let yq = f32x4::splat(token, Q[6]).mul_add(x, f32x4::splat(token, Q[5]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[4]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[3]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[2]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[1]));
let yq = yq.mul_add(x, f32x4::splat(token, Q[0]));
let power_result = yp / yq;
let thresh_mask = abs_v.simd_lt(f32x4::splat(token, LINEAR_THRESHOLD));
let result = f32x4::blend(thresh_mask, linear_result, power_result);
f32x4::blend(neg_mask, -result, result).to_array()
}
#[cfg(target_arch = "x86_64")]
#[rite]
pub fn gamma_to_linear_v3(token: X64V3Token, encoded: [f32; 4], gamma: f32) -> [f32; 4] {
gamma_to_linear_core(token, encoded, gamma)
}
#[cfg(target_arch = "x86_64")]
#[rite]
pub fn linear_to_gamma_v3(token: X64V3Token, linear: [f32; 4], gamma: f32) -> [f32; 4] {
linear_to_gamma_core(token, linear, gamma)
}
#[cfg(target_arch = "x86_64")]
#[rite]
pub fn srgb_to_linear_slice_v3(token: X64V3Token, values: &mut [f32]) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = srgb_to_linear_core(token, *chunk);
}
for v in remainder {
*v = crate::scalar::srgb_to_linear(*v);
}
}
#[cfg(target_arch = "x86_64")]
#[rite]
pub fn linear_to_srgb_slice_v3(token: X64V3Token, values: &mut [f32]) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = linear_to_srgb_core(token, *chunk);
}
for v in remainder {
*v = crate::scalar::linear_to_srgb(*v);
}
}
#[cfg(target_arch = "x86_64")]
#[rite]
pub fn gamma_to_linear_slice_v3(token: X64V3Token, values: &mut [f32], gamma: f32) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = gamma_to_linear_core(token, *chunk, gamma);
}
for v in remainder {
*v = crate::scalar::gamma_to_linear(*v, gamma);
}
}
#[cfg(target_arch = "x86_64")]
#[rite]
pub fn linear_to_gamma_slice_v3(token: X64V3Token, values: &mut [f32], gamma: f32) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = linear_to_gamma_core(token, *chunk, gamma);
}
for v in remainder {
*v = crate::scalar::linear_to_gamma(*v, gamma);
}
}
#[cfg(target_arch = "aarch64")]
#[rite]
pub fn srgb_to_linear_neon(token: NeonToken, srgb: [f32; 4]) -> [f32; 4] {
srgb_to_linear_core(token, srgb)
}
#[cfg(target_arch = "aarch64")]
#[rite]
pub fn linear_to_srgb_neon(token: NeonToken, linear: [f32; 4]) -> [f32; 4] {
linear_to_srgb_core(token, linear)
}
#[cfg(target_arch = "aarch64")]
#[rite]
pub fn gamma_to_linear_neon(token: NeonToken, encoded: [f32; 4], gamma: f32) -> [f32; 4] {
gamma_to_linear_core(token, encoded, gamma)
}
#[cfg(target_arch = "aarch64")]
#[rite]
pub fn linear_to_gamma_neon(token: NeonToken, linear: [f32; 4], gamma: f32) -> [f32; 4] {
linear_to_gamma_core(token, linear, gamma)
}
#[cfg(target_arch = "aarch64")]
#[rite]
pub fn srgb_to_linear_slice_neon(token: NeonToken, values: &mut [f32]) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = srgb_to_linear_core(token, *chunk);
}
for v in remainder {
*v = crate::scalar::srgb_to_linear(*v);
}
}
#[cfg(target_arch = "aarch64")]
#[rite]
pub fn linear_to_srgb_slice_neon(token: NeonToken, values: &mut [f32]) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = linear_to_srgb_core(token, *chunk);
}
for v in remainder {
*v = crate::scalar::linear_to_srgb(*v);
}
}
#[cfg(target_arch = "aarch64")]
#[rite]
pub fn gamma_to_linear_slice_neon(token: NeonToken, values: &mut [f32], gamma: f32) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = gamma_to_linear_core(token, *chunk, gamma);
}
for v in remainder {
*v = crate::scalar::gamma_to_linear(*v, gamma);
}
}
#[cfg(target_arch = "aarch64")]
#[rite]
pub fn linear_to_gamma_slice_neon(token: NeonToken, values: &mut [f32], gamma: f32) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = linear_to_gamma_core(token, *chunk, gamma);
}
for v in remainder {
*v = crate::scalar::linear_to_gamma(*v, gamma);
}
}
#[cfg(target_arch = "wasm32")]
#[rite]
pub fn srgb_to_linear_wasm128(token: Wasm128Token, srgb: [f32; 4]) -> [f32; 4] {
srgb_to_linear_core(token, srgb)
}
#[cfg(target_arch = "wasm32")]
#[rite]
pub fn linear_to_srgb_wasm128(token: Wasm128Token, linear: [f32; 4]) -> [f32; 4] {
linear_to_srgb_core(token, linear)
}
#[cfg(target_arch = "wasm32")]
#[rite]
pub fn gamma_to_linear_wasm128(token: Wasm128Token, encoded: [f32; 4], gamma: f32) -> [f32; 4] {
gamma_to_linear_core(token, encoded, gamma)
}
#[cfg(target_arch = "wasm32")]
#[rite]
pub fn linear_to_gamma_wasm128(token: Wasm128Token, linear: [f32; 4], gamma: f32) -> [f32; 4] {
linear_to_gamma_core(token, linear, gamma)
}
#[cfg(target_arch = "wasm32")]
#[rite]
pub fn srgb_to_linear_slice_wasm128(token: Wasm128Token, values: &mut [f32]) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = srgb_to_linear_core(token, *chunk);
}
for v in remainder {
*v = crate::scalar::srgb_to_linear(*v);
}
}
#[cfg(target_arch = "wasm32")]
#[rite]
pub fn linear_to_srgb_slice_wasm128(token: Wasm128Token, values: &mut [f32]) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = linear_to_srgb_core(token, *chunk);
}
for v in remainder {
*v = crate::scalar::linear_to_srgb(*v);
}
}
#[cfg(target_arch = "wasm32")]
#[rite]
pub fn gamma_to_linear_slice_wasm128(token: Wasm128Token, values: &mut [f32], gamma: f32) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = gamma_to_linear_core(token, *chunk, gamma);
}
for v in remainder {
*v = crate::scalar::gamma_to_linear(*v, gamma);
}
}
#[cfg(target_arch = "wasm32")]
#[rite]
pub fn linear_to_gamma_slice_wasm128(token: Wasm128Token, values: &mut [f32], gamma: f32) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = linear_to_gamma_core(token, *chunk, gamma);
}
for v in remainder {
*v = crate::scalar::linear_to_gamma(*v, gamma);
}
}
macro_rules! x86_tf_rite {
($name:ident, $inner:path) => {
#[cfg(all(feature = "transfer", target_arch = "x86_64"))]
#[rite]
pub fn $name(token: X64V3Token, v: [f32; 4]) -> [f32; 4] {
$inner(token, gen_f32x4::from_array(token, v)).to_array()
}
};
}
x86_tf_rite!(tf_srgb_to_linear_v3, crate::tf::srgb::srgb_to_linear_x4);
x86_tf_rite!(tf_linear_to_srgb_v3, crate::tf::srgb::linear_to_srgb_x4);
x86_tf_rite!(bt709_to_linear_v3, crate::tf::bt709::bt709_to_linear_x4);
x86_tf_rite!(linear_to_bt709_v3, crate::tf::bt709::linear_to_bt709_x4);
x86_tf_rite!(pq_to_linear_v3, crate::tf::pq::pq_to_linear_x4);
x86_tf_rite!(linear_to_pq_v3, crate::tf::pq::linear_to_pq_x4);
x86_tf_rite!(hlg_to_linear_v3, crate::tf::hlg::hlg_to_linear_x4);
x86_tf_rite!(linear_to_hlg_v3, crate::tf::hlg::linear_to_hlg_x4);
macro_rules! x86_tf_slice_rite {
($name:ident, $rite:ident, $scalar:path) => {
#[cfg(all(feature = "transfer", target_arch = "x86_64"))]
#[rite]
pub fn $name(token: X64V3Token, values: &mut [f32]) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = $rite(token, *chunk);
}
for v in remainder {
*v = $scalar(*v);
}
}
};
}
x86_tf_slice_rite!(
tf_srgb_to_linear_slice_v3,
tf_srgb_to_linear_v3,
crate::tf::srgb_to_linear
);
x86_tf_slice_rite!(
tf_linear_to_srgb_slice_v3,
tf_linear_to_srgb_v3,
crate::tf::linear_to_srgb
);
x86_tf_slice_rite!(
bt709_to_linear_slice_v3,
bt709_to_linear_v3,
crate::tf::bt709_to_linear
);
x86_tf_slice_rite!(
linear_to_bt709_slice_v3,
linear_to_bt709_v3,
crate::tf::linear_to_bt709
);
x86_tf_slice_rite!(
pq_to_linear_slice_v3,
pq_to_linear_v3,
crate::tf::pq_to_linear
);
x86_tf_slice_rite!(
linear_to_pq_slice_v3,
linear_to_pq_v3,
crate::tf::linear_to_pq
);
x86_tf_slice_rite!(
hlg_to_linear_slice_v3,
hlg_to_linear_v3,
crate::tf::hlg_to_linear
);
x86_tf_slice_rite!(
linear_to_hlg_slice_v3,
linear_to_hlg_v3,
crate::tf::linear_to_hlg
);
macro_rules! neon_tf_rite {
($name:ident, $inner:path) => {
#[cfg(all(feature = "transfer", target_arch = "aarch64"))]
#[rite]
pub fn $name(token: NeonToken, v: [f32; 4]) -> [f32; 4] {
$inner(token, gen_f32x4::from_array(token, v)).to_array()
}
};
}
neon_tf_rite!(tf_srgb_to_linear_neon, crate::tf::srgb::srgb_to_linear_x4);
neon_tf_rite!(tf_linear_to_srgb_neon, crate::tf::srgb::linear_to_srgb_x4);
neon_tf_rite!(bt709_to_linear_neon, crate::tf::bt709::bt709_to_linear_x4);
neon_tf_rite!(linear_to_bt709_neon, crate::tf::bt709::linear_to_bt709_x4);
neon_tf_rite!(pq_to_linear_neon, crate::tf::pq::pq_to_linear_x4);
neon_tf_rite!(linear_to_pq_neon, crate::tf::pq::linear_to_pq_x4);
neon_tf_rite!(hlg_to_linear_neon, crate::tf::hlg::hlg_to_linear_x4);
neon_tf_rite!(linear_to_hlg_neon, crate::tf::hlg::linear_to_hlg_x4);
macro_rules! neon_tf_slice_rite {
($name:ident, $rite:ident, $scalar:path) => {
#[cfg(all(feature = "transfer", target_arch = "aarch64"))]
#[rite]
pub fn $name(token: NeonToken, values: &mut [f32]) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = $rite(token, *chunk);
}
for v in remainder {
*v = $scalar(*v);
}
}
};
}
neon_tf_slice_rite!(
tf_srgb_to_linear_slice_neon,
tf_srgb_to_linear_neon,
crate::tf::srgb_to_linear
);
neon_tf_slice_rite!(
tf_linear_to_srgb_slice_neon,
tf_linear_to_srgb_neon,
crate::tf::linear_to_srgb
);
neon_tf_slice_rite!(
bt709_to_linear_slice_neon,
bt709_to_linear_neon,
crate::tf::bt709_to_linear
);
neon_tf_slice_rite!(
linear_to_bt709_slice_neon,
linear_to_bt709_neon,
crate::tf::linear_to_bt709
);
neon_tf_slice_rite!(
pq_to_linear_slice_neon,
pq_to_linear_neon,
crate::tf::pq_to_linear
);
neon_tf_slice_rite!(
linear_to_pq_slice_neon,
linear_to_pq_neon,
crate::tf::linear_to_pq
);
neon_tf_slice_rite!(
hlg_to_linear_slice_neon,
hlg_to_linear_neon,
crate::tf::hlg_to_linear
);
neon_tf_slice_rite!(
linear_to_hlg_slice_neon,
linear_to_hlg_neon,
crate::tf::linear_to_hlg
);
macro_rules! wasm_tf_rite {
($name:ident, $inner:path) => {
#[cfg(all(feature = "transfer", target_arch = "wasm32"))]
#[rite]
pub fn $name(token: Wasm128Token, v: [f32; 4]) -> [f32; 4] {
$inner(token, gen_f32x4::from_array(token, v)).to_array()
}
};
}
wasm_tf_rite!(
tf_srgb_to_linear_wasm128,
crate::tf::srgb::srgb_to_linear_x4
);
wasm_tf_rite!(
tf_linear_to_srgb_wasm128,
crate::tf::srgb::linear_to_srgb_x4
);
wasm_tf_rite!(
bt709_to_linear_wasm128,
crate::tf::bt709::bt709_to_linear_x4
);
wasm_tf_rite!(
linear_to_bt709_wasm128,
crate::tf::bt709::linear_to_bt709_x4
);
wasm_tf_rite!(pq_to_linear_wasm128, crate::tf::pq::pq_to_linear_x4);
wasm_tf_rite!(linear_to_pq_wasm128, crate::tf::pq::linear_to_pq_x4);
wasm_tf_rite!(hlg_to_linear_wasm128, crate::tf::hlg::hlg_to_linear_x4);
wasm_tf_rite!(linear_to_hlg_wasm128, crate::tf::hlg::linear_to_hlg_x4);
macro_rules! wasm_tf_slice_rite {
($name:ident, $rite:ident, $scalar:path) => {
#[cfg(all(feature = "transfer", target_arch = "wasm32"))]
#[rite]
pub fn $name(token: Wasm128Token, values: &mut [f32]) {
let (chunks, remainder) = values.as_chunks_mut::<4>();
for chunk in chunks {
*chunk = $rite(token, *chunk);
}
for v in remainder {
*v = $scalar(*v);
}
}
};
}
wasm_tf_slice_rite!(
tf_srgb_to_linear_slice_wasm128,
tf_srgb_to_linear_wasm128,
crate::tf::srgb_to_linear
);
wasm_tf_slice_rite!(
tf_linear_to_srgb_slice_wasm128,
tf_linear_to_srgb_wasm128,
crate::tf::linear_to_srgb
);
wasm_tf_slice_rite!(
bt709_to_linear_slice_wasm128,
bt709_to_linear_wasm128,
crate::tf::bt709_to_linear
);
wasm_tf_slice_rite!(
linear_to_bt709_slice_wasm128,
linear_to_bt709_wasm128,
crate::tf::linear_to_bt709
);
wasm_tf_slice_rite!(
pq_to_linear_slice_wasm128,
pq_to_linear_wasm128,
crate::tf::pq_to_linear
);
wasm_tf_slice_rite!(
linear_to_pq_slice_wasm128,
linear_to_pq_wasm128,
crate::tf::linear_to_pq
);
wasm_tf_slice_rite!(
hlg_to_linear_slice_wasm128,
hlg_to_linear_wasm128,
crate::tf::hlg_to_linear
);
wasm_tf_slice_rite!(
linear_to_hlg_slice_wasm128,
linear_to_hlg_wasm128,
crate::tf::linear_to_hlg
);
#[cfg(test)]
#[cfg(target_arch = "x86_64")]
mod tests_x86 {
use super::*;
use archmage::SimdToken;
#[cfg(not(feature = "std"))]
use alloc::{vec, vec::Vec};
fn get_token() -> Option<X64V3Token> {
X64V3Token::try_new()
}
#[archmage::arcane]
fn call_srgb_to_linear(token: X64V3Token, input: [f32; 4]) -> [f32; 4] {
srgb_to_linear_v3(token, input)
}
#[archmage::arcane]
fn call_linear_to_srgb(token: X64V3Token, input: [f32; 4]) -> [f32; 4] {
linear_to_srgb_v3(token, input)
}
#[archmage::arcane]
fn call_srgb_to_linear_slice(token: X64V3Token, values: &mut [f32]) {
srgb_to_linear_slice_v3(token, values);
}
#[archmage::arcane]
fn call_linear_to_srgb_slice(token: X64V3Token, values: &mut [f32]) {
linear_to_srgb_slice_v3(token, values);
}
#[test]
fn test_x4_srgb_roundtrip() {
let Some(token) = get_token() else {
eprintln!("Skipping test: X64V3 not available");
return;
};
let input = [0.0, 0.3, 0.7, 1.0];
let linear = call_srgb_to_linear(token, input);
let roundtrip = call_linear_to_srgb(token, linear);
for (i, (&orig, &rt)) in input.iter().zip(roundtrip.iter()).enumerate() {
assert!(
(orig - rt).abs() < 1e-4,
"roundtrip failed at {}: {} -> {}",
i,
orig,
rt
);
}
}
#[test]
fn test_x4_matches_scalar() {
let Some(token) = get_token() else {
eprintln!("Skipping test: X64V3 not available");
return;
};
let input = [0.0, 0.3, 0.7, 1.0];
let result = call_srgb_to_linear(token, input);
for (i, (&got, &inp)) in result.iter().zip(input.iter()).enumerate() {
let expected = crate::scalar::srgb_to_linear(inp);
assert!(
(got - expected).abs() < 1e-5,
"mismatch at {}: got {}, expected {}",
i,
got,
expected
);
}
}
#[test]
fn test_x4_matches_x8() {
let Some(token) = get_token() else {
eprintln!("Skipping test: X64V3 not available");
return;
};
let input = [0.1, 0.4, 0.7, 0.95];
let x4_result = call_srgb_to_linear(token, input);
for (i, (&got, &inp)) in x4_result.iter().zip(input.iter()).enumerate() {
let expected = crate::rational_poly::srgb_to_linear_fast(inp);
assert!(
(got - expected).abs() < 1e-6,
"x4 vs rational_poly mismatch at {}: got {}, expected {}",
i,
got,
expected
);
}
}
#[test]
fn test_slice_roundtrip() {
let Some(token) = get_token() else {
eprintln!("Skipping test: X64V3 not available");
return;
};
let mut values: Vec<f32> = (0..100).map(|i| i as f32 / 99.0).collect();
let original = values.clone();
call_srgb_to_linear_slice(token, &mut values);
call_linear_to_srgb_slice(token, &mut values);
for (i, (&orig, &conv)) in original.iter().zip(values.iter()).enumerate() {
assert!(
(orig - conv).abs() < 1e-4,
"roundtrip failed at {}: {} -> {}",
i,
orig,
conv
);
}
}
}
#[cfg(test)]
#[cfg(target_arch = "aarch64")]
mod tests_aarch64 {
use super::*;
use archmage::SimdToken;
#[cfg(not(feature = "std"))]
use alloc::{vec, vec::Vec};
fn get_token() -> Option<NeonToken> {
NeonToken::try_new()
}
#[archmage::arcane]
fn call_srgb_to_linear(token: NeonToken, input: [f32; 4]) -> [f32; 4] {
srgb_to_linear_neon(token, input)
}
#[archmage::arcane]
fn call_linear_to_srgb(token: NeonToken, input: [f32; 4]) -> [f32; 4] {
linear_to_srgb_neon(token, input)
}
#[archmage::arcane]
fn call_srgb_to_linear_slice(token: NeonToken, values: &mut [f32]) {
srgb_to_linear_slice_neon(token, values);
}
#[archmage::arcane]
fn call_linear_to_srgb_slice(token: NeonToken, values: &mut [f32]) {
linear_to_srgb_slice_neon(token, values);
}
#[test]
fn test_x4_srgb_roundtrip() {
let Some(token) = get_token() else {
eprintln!("Skipping test: NEON not available");
return;
};
let input = [0.0, 0.3, 0.7, 1.0];
let linear = call_srgb_to_linear(token, input);
let roundtrip = call_linear_to_srgb(token, linear);
for (i, (&orig, &rt)) in input.iter().zip(roundtrip.iter()).enumerate() {
assert!(
(orig - rt).abs() < 1e-4,
"roundtrip failed at {}: {} -> {}",
i,
orig,
rt
);
}
}
#[test]
fn test_x4_matches_scalar() {
let Some(token) = get_token() else {
eprintln!("Skipping test: NEON not available");
return;
};
let input = [0.0, 0.3, 0.7, 1.0];
let result = call_srgb_to_linear(token, input);
for (i, (&got, &inp)) in result.iter().zip(input.iter()).enumerate() {
let expected = crate::scalar::srgb_to_linear(inp);
assert!(
(got - expected).abs() < 1e-5,
"mismatch at {}: got {}, expected {}",
i,
got,
expected
);
}
}
#[test]
fn test_slice_roundtrip() {
let Some(token) = get_token() else {
eprintln!("Skipping test: NEON not available");
return;
};
let mut values: Vec<f32> = (0..100).map(|i| i as f32 / 99.0).collect();
let original = values.clone();
call_srgb_to_linear_slice(token, &mut values);
call_linear_to_srgb_slice(token, &mut values);
for (i, (&orig, &conv)) in original.iter().zip(values.iter()).enumerate() {
assert!(
(orig - conv).abs() < 1e-4,
"roundtrip failed at {}: {} -> {}",
i,
orig,
conv
);
}
}
}