use super::super::*;
const WIDTHS: &[usize] = &[
1, 4, 5, 7, 8, 9, 15, 16, 17, 24, 31, 32, 33, 47, 48, 64, 65, 128, 130,
];
fn prng_f32(out: &mut [f32], seed: u32) {
let mut s = seed;
for v in out.iter_mut() {
s = s.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
*v = ((s >> 8) as f32) / (u32::MAX as f32) * 1.4 - 0.1;
}
}
fn prng_f16(out: &mut [half::f16], seed: u32) {
let mut buf = std::vec![0.0f32; out.len()];
prng_f32(&mut buf, seed);
for (o, v) in out.iter_mut().zip(buf.iter()) {
*o = half::f16::from_f32(*v);
}
}
fn asym_ramp_f32(g: &mut [f32], b: &mut [f32], r: &mut [f32]) {
for n in 0..g.len() {
g[n] = ((n + 1) * 2) as f32 / 255.0; b[n] = ((n + 1) * 3) as f32 / 255.0; r[n] = (n + 1) as f32 / 255.0; }
}
fn asym_ramp_f32_a(g: &mut [f32], b: &mut [f32], r: &mut [f32], a: &mut [f32]) {
for n in 0..g.len() {
g[n] = ((n + 1) * 2) as f32 / 255.0;
b[n] = ((n + 1) * 3) as f32 / 255.0;
r[n] = (n + 1) as f32 / 255.0;
a[n] = ((n + 1) * 4) as f32 / 255.0;
}
}
fn asym_ramp_f16(g: &mut [half::f16], b: &mut [half::f16], r: &mut [half::f16]) {
for n in 0..g.len() {
g[n] = half::f16::from_f32(((n + 1) * 2) as f32 / 255.0);
b[n] = half::f16::from_f32(((n + 1) * 3) as f32 / 255.0);
r[n] = half::f16::from_f32((n + 1) as f32 / 255.0);
}
}
fn asym_ramp_f16_a(
g: &mut [half::f16],
b: &mut [half::f16],
r: &mut [half::f16],
a: &mut [half::f16],
) {
for n in 0..g.len() {
g[n] = half::f16::from_f32(((n + 1) * 2) as f32 / 255.0);
b[n] = half::f16::from_f32(((n + 1) * 3) as f32 / 255.0);
r[n] = half::f16::from_f32((n + 1) as f32 / 255.0);
a[n] = half::f16::from_f32(((n + 1) * 4) as f32 / 255.0);
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgb_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC001_0001);
prng_f32(&mut b, 0xC001_0002);
prng_f32(&mut r, 0xC001_0003);
let mut simd = std::vec![0u8; w * 3];
let mut scal = std::vec![0u8; w * 3];
unsafe { gbrpf32_to_rgb_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgb_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgb width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgb_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
asym_ramp_f32(&mut g, &mut b, &mut r);
let mut simd = std::vec![0u8; w * 3];
let mut scal = std::vec![0u8; w * 3];
unsafe { gbrpf32_to_rgb_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgb_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgb lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgba_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC002_0001);
prng_f32(&mut b, 0xC002_0002);
prng_f32(&mut r, 0xC002_0003);
let mut simd = std::vec![0u8; w * 4];
let mut scal = std::vec![0u8; w * 4];
unsafe { gbrpf32_to_rgba_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgba_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgba width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgba_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
asym_ramp_f32(&mut g, &mut b, &mut r);
let mut simd = std::vec![0u8; w * 4];
let mut scal = std::vec![0u8; w * 4];
unsafe { gbrpf32_to_rgba_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgba_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgba lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgb_u16_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC003_0001);
prng_f32(&mut b, 0xC003_0002);
prng_f32(&mut r, 0xC003_0003);
let mut simd = std::vec![0u16; w * 3];
let mut scal = std::vec![0u16; w * 3];
unsafe { gbrpf32_to_rgb_u16_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgb_u16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgb_u16 width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgb_u16_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
asym_ramp_f32(&mut g, &mut b, &mut r);
let mut simd = std::vec![0u16; w * 3];
let mut scal = std::vec![0u16; w * 3];
unsafe { gbrpf32_to_rgb_u16_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgb_u16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgb_u16 lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgba_u16_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC004_0001);
prng_f32(&mut b, 0xC004_0002);
prng_f32(&mut r, 0xC004_0003);
let mut simd = std::vec![0u16; w * 4];
let mut scal = std::vec![0u16; w * 4];
unsafe { gbrpf32_to_rgba_u16_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgba_u16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgba_u16 width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgba_u16_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
asym_ramp_f32(&mut g, &mut b, &mut r);
let mut simd = std::vec![0u16; w * 4];
let mut scal = std::vec![0u16; w * 4];
unsafe { gbrpf32_to_rgba_u16_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgba_u16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgba_u16 lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgb_f32_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC005_0001);
prng_f32(&mut b, 0xC005_0002);
prng_f32(&mut r, 0xC005_0003);
let mut simd = std::vec![0.0f32; w * 3];
let mut scal = std::vec![0.0f32; w * 3];
unsafe { gbrpf32_to_rgb_f32_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgb_f32_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgb_f32 width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgba_f32_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC006_0001);
prng_f32(&mut b, 0xC006_0002);
prng_f32(&mut r, 0xC006_0003);
let mut simd = std::vec![0.0f32; w * 4];
let mut scal = std::vec![0.0f32; w * 4];
unsafe { gbrpf32_to_rgba_f32_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgba_f32_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgba_f32 width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgb_f16_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC007_0001);
prng_f32(&mut b, 0xC007_0002);
prng_f32(&mut r, 0xC007_0003);
let mut simd = std::vec![half::f16::ZERO; w * 3];
let mut scal = std::vec![half::f16::ZERO; w * 3];
unsafe { gbrpf32_to_rgb_f16_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgb_f16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgb_f16 (F16C) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgb_f16_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
asym_ramp_f32(&mut g, &mut b, &mut r);
let mut simd = std::vec![half::f16::ZERO; w * 3];
let mut scal = std::vec![half::f16::ZERO; w * 3];
unsafe { gbrpf32_to_rgb_f16_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgb_f16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgb_f16 lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgba_f16_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC008_0001);
prng_f32(&mut b, 0xC008_0002);
prng_f32(&mut r, 0xC008_0003);
let mut simd = std::vec![half::f16::ZERO; w * 4];
let mut scal = std::vec![half::f16::ZERO; w * 4];
unsafe { gbrpf32_to_rgba_f16_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgba_f16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgba_f16 (F16C) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgba_f16_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
asym_ramp_f32(&mut g, &mut b, &mut r);
let mut simd = std::vec![half::f16::ZERO; w * 4];
let mut scal = std::vec![half::f16::ZERO; w * 4];
unsafe { gbrpf32_to_rgba_f16_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_float::gbrpf32_to_rgba_f16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf32_to_rgba_f16 lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_luma_matches_scalar() {
use crate::ColorMatrix;
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC009_0001);
prng_f32(&mut b, 0xC009_0002);
prng_f32(&mut r, 0xC009_0003);
let mut simd = std::vec![0u8; w];
let mut scal = std::vec![0u8; w];
unsafe { gbrpf32_to_luma_row::<false>(&g, &b, &r, &mut simd, w, ColorMatrix::Bt709, true) };
scalar::planar_gbr_float::gbrpf32_to_luma_row::<false>(
&g,
&b,
&r,
&mut scal,
w,
ColorMatrix::Bt709,
true,
);
assert_eq!(simd, scal, "gbrpf32_to_luma width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_luma_u16_matches_scalar() {
use crate::ColorMatrix;
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC00A_0001);
prng_f32(&mut b, 0xC00A_0002);
prng_f32(&mut r, 0xC00A_0003);
let mut simd = std::vec![0u16; w];
let mut scal = std::vec![0u16; w];
unsafe { gbrpf32_to_luma_u16_row::<false>(&g, &b, &r, &mut simd, w, ColorMatrix::Bt709, true) };
scalar::planar_gbr_float::gbrpf32_to_luma_u16_row::<false>(
&g,
&b,
&r,
&mut scal,
w,
ColorMatrix::Bt709,
true,
);
assert_eq!(simd, scal, "gbrpf32_to_luma_u16 width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_hsv_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC00B_0001);
prng_f32(&mut b, 0xC00B_0002);
prng_f32(&mut r, 0xC00B_0003);
let mut simd_h = std::vec![0u8; w];
let mut simd_s = std::vec![0u8; w];
let mut simd_v = std::vec![0u8; w];
let mut scal_h = std::vec![0u8; w];
let mut scal_s = std::vec![0u8; w];
let mut scal_v = std::vec![0u8; w];
unsafe { gbrpf32_to_hsv_row::<false>(&g, &b, &r, &mut simd_h, &mut simd_s, &mut simd_v, w) };
scalar::planar_gbr_float::gbrpf32_to_hsv_row::<false>(
&g,
&b,
&r,
&mut scal_h,
&mut scal_s,
&mut scal_v,
w,
);
assert_eq!(simd_h, scal_h, "gbrpf32 hsv H width={w}");
assert_eq!(simd_s, scal_s, "gbrpf32 hsv S width={w}");
assert_eq!(simd_v, scal_v, "gbrpf32 hsv V width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf32_to_rgba_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
let mut a = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC00C_0001);
prng_f32(&mut b, 0xC00C_0002);
prng_f32(&mut r, 0xC00C_0003);
prng_f32(&mut a, 0xC00C_0004);
let mut simd = std::vec![0u8; w * 4];
let mut scal = std::vec![0u8; w * 4];
unsafe { gbrapf32_to_rgba_row::<false>(&g, &b, &r, &a, &mut simd, w) };
scalar::planar_gbr_float::gbrapf32_to_rgba_row::<false>(&g, &b, &r, &a, &mut scal, w);
assert_eq!(simd, scal, "gbrapf32_to_rgba width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf32_to_rgba_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
let mut a = std::vec![0.0f32; w];
asym_ramp_f32_a(&mut g, &mut b, &mut r, &mut a);
let mut simd = std::vec![0u8; w * 4];
let mut scal = std::vec![0u8; w * 4];
unsafe { gbrapf32_to_rgba_row::<false>(&g, &b, &r, &a, &mut simd, w) };
scalar::planar_gbr_float::gbrapf32_to_rgba_row::<false>(&g, &b, &r, &a, &mut scal, w);
assert_eq!(simd, scal, "gbrapf32_to_rgba lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf32_to_rgba_u16_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
let mut a = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC00D_0001);
prng_f32(&mut b, 0xC00D_0002);
prng_f32(&mut r, 0xC00D_0003);
prng_f32(&mut a, 0xC00D_0004);
let mut simd = std::vec![0u16; w * 4];
let mut scal = std::vec![0u16; w * 4];
unsafe { gbrapf32_to_rgba_u16_row::<false>(&g, &b, &r, &a, &mut simd, w) };
scalar::planar_gbr_float::gbrapf32_to_rgba_u16_row::<false>(&g, &b, &r, &a, &mut scal, w);
assert_eq!(simd, scal, "gbrapf32_to_rgba_u16 width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf32_to_rgba_u16_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
let mut a = std::vec![0.0f32; w];
asym_ramp_f32_a(&mut g, &mut b, &mut r, &mut a);
let mut simd = std::vec![0u16; w * 4];
let mut scal = std::vec![0u16; w * 4];
unsafe { gbrapf32_to_rgba_u16_row::<false>(&g, &b, &r, &a, &mut simd, w) };
scalar::planar_gbr_float::gbrapf32_to_rgba_u16_row::<false>(&g, &b, &r, &a, &mut scal, w);
assert_eq!(simd, scal, "gbrapf32_to_rgba_u16 lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf32_to_rgba_f32_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
let mut a = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC00E_0001);
prng_f32(&mut b, 0xC00E_0002);
prng_f32(&mut r, 0xC00E_0003);
prng_f32(&mut a, 0xC00E_0004);
let mut simd = std::vec![0.0f32; w * 4];
let mut scal = std::vec![0.0f32; w * 4];
unsafe { gbrapf32_to_rgba_f32_row::<false>(&g, &b, &r, &a, &mut simd, w) };
scalar::planar_gbr_float::gbrapf32_to_rgba_f32_row::<false>(&g, &b, &r, &a, &mut scal, w);
assert_eq!(simd, scal, "gbrapf32_to_rgba_f32 width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf32_to_rgba_f16_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
let mut a = std::vec![0.0f32; w];
prng_f32(&mut g, 0xC00F_0001);
prng_f32(&mut b, 0xC00F_0002);
prng_f32(&mut r, 0xC00F_0003);
prng_f32(&mut a, 0xC00F_0004);
let mut simd = std::vec![half::f16::ZERO; w * 4];
let mut scal = std::vec![half::f16::ZERO; w * 4];
unsafe { gbrapf32_to_rgba_f16_row_f16c::<false>(&g, &b, &r, &a, &mut simd, w) };
scalar::planar_gbr_float::gbrapf32_to_rgba_f16_row::<false>(&g, &b, &r, &a, &mut scal, w);
assert_eq!(simd, scal, "gbrapf32_to_rgba_f16 (F16C) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf32_to_rgba_f16_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
let mut a = std::vec![0.0f32; w];
asym_ramp_f32_a(&mut g, &mut b, &mut r, &mut a);
let mut simd = std::vec![half::f16::ZERO; w * 4];
let mut scal = std::vec![half::f16::ZERO; w * 4];
unsafe { gbrapf32_to_rgba_f16_row_f16c::<false>(&g, &b, &r, &a, &mut simd, w) };
scalar::planar_gbr_float::gbrapf32_to_rgba_f16_row::<false>(&g, &b, &r, &a, &mut scal, w);
assert_eq!(simd, scal, "gbrapf32_to_rgba_f16 lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgb_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD001_0001);
prng_f16(&mut b, 0xD001_0002);
prng_f16(&mut r, 0xD001_0003);
let mut simd = std::vec![0u8; w * 3];
let mut scal = std::vec![0u8; w * 3];
unsafe { gbrpf16_to_rgb_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_rgb_row::<false>(&gf, &bf, &rf, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgb (F16C widen) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgb_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
asym_ramp_f16(&mut g, &mut b, &mut r);
let mut simd = std::vec![0u8; w * 3];
let mut scal = std::vec![0u8; w * 3];
unsafe { gbrpf16_to_rgb_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_rgb_row::<false>(&gf, &bf, &rf, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgb lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgba_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD002_0001);
prng_f16(&mut b, 0xD002_0002);
prng_f16(&mut r, 0xD002_0003);
let mut simd = std::vec![0u8; w * 4];
let mut scal = std::vec![0u8; w * 4];
unsafe { gbrpf16_to_rgba_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_rgba_row::<false>(&gf, &bf, &rf, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgba (F16C widen) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgba_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
asym_ramp_f16(&mut g, &mut b, &mut r);
let mut simd = std::vec![0u8; w * 4];
let mut scal = std::vec![0u8; w * 4];
unsafe { gbrpf16_to_rgba_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_rgba_row::<false>(&gf, &bf, &rf, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgba lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgb_u16_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD003_0001);
prng_f16(&mut b, 0xD003_0002);
prng_f16(&mut r, 0xD003_0003);
let mut simd = std::vec![0u16; w * 3];
let mut scal = std::vec![0u16; w * 3];
unsafe { gbrpf16_to_rgb_u16_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_rgb_u16_row::<false>(&gf, &bf, &rf, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgb_u16 (F16C widen) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgba_u16_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD004_0001);
prng_f16(&mut b, 0xD004_0002);
prng_f16(&mut r, 0xD004_0003);
let mut simd = std::vec![0u16; w * 4];
let mut scal = std::vec![0u16; w * 4];
unsafe { gbrpf16_to_rgba_u16_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_rgba_u16_row::<false>(&gf, &bf, &rf, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgba_u16 (F16C widen) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgb_f32_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD005_0001);
prng_f16(&mut b, 0xD005_0002);
prng_f16(&mut r, 0xD005_0003);
let mut simd = std::vec![0.0f32; w * 3];
let mut scal = std::vec![0.0f32; w * 3];
unsafe { gbrpf16_to_rgb_f32_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_rgb_f32_row::<false>(&gf, &bf, &rf, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgb_f32 (F16C widen) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgba_f32_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD006_0001);
prng_f16(&mut b, 0xD006_0002);
prng_f16(&mut r, 0xD006_0003);
let mut simd = std::vec![0.0f32; w * 4];
let mut scal = std::vec![0.0f32; w * 4];
unsafe { gbrpf16_to_rgba_f32_row_f16c::<false>(&g, &b, &r, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_rgba_f32_row::<false>(&gf, &bf, &rf, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgba_f32 (F16C widen) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgb_f16_lossless_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD007_0001);
prng_f16(&mut b, 0xD007_0002);
prng_f16(&mut r, 0xD007_0003);
let mut simd = std::vec![half::f16::ZERO; w * 3];
let mut scal = std::vec![half::f16::ZERO; w * 3];
unsafe { gbrpf16_to_rgb_f16_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_f16::gbrpf16_to_rgb_f16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgb_f16 lossless width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgb_f16_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
asym_ramp_f16(&mut g, &mut b, &mut r);
let mut simd = std::vec![half::f16::ZERO; w * 3];
let mut scal = std::vec![half::f16::ZERO; w * 3];
unsafe { gbrpf16_to_rgb_f16_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_f16::gbrpf16_to_rgb_f16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgb_f16 lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgba_f16_lossless_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD008_0001);
prng_f16(&mut b, 0xD008_0002);
prng_f16(&mut r, 0xD008_0003);
let mut simd = std::vec![half::f16::ZERO; w * 4];
let mut scal = std::vec![half::f16::ZERO; w * 4];
unsafe { gbrpf16_to_rgba_f16_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_f16::gbrpf16_to_rgba_f16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgba_f16 lossless width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgba_f16_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
asym_ramp_f16(&mut g, &mut b, &mut r);
let mut simd = std::vec![half::f16::ZERO; w * 4];
let mut scal = std::vec![half::f16::ZERO; w * 4];
unsafe { gbrpf16_to_rgba_f16_row::<false>(&g, &b, &r, &mut simd, w) };
scalar::planar_gbr_f16::gbrpf16_to_rgba_f16_row::<false>(&g, &b, &r, &mut scal, w);
assert_eq!(simd, scal, "gbrpf16_to_rgba_f16 lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_luma_f16c_matches_scalar() {
use crate::ColorMatrix;
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD009_0001);
prng_f16(&mut b, 0xD009_0002);
prng_f16(&mut r, 0xD009_0003);
let mut simd = std::vec![0u8; w];
let mut scal = std::vec![0u8; w];
unsafe {
gbrpf16_to_luma_row_f16c::<false>(&g, &b, &r, &mut simd, w, ColorMatrix::Bt709, true)
};
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_luma_row::<false>(
&gf,
&bf,
&rf,
&mut scal,
w,
ColorMatrix::Bt709,
true,
);
assert_eq!(simd, scal, "gbrpf16_to_luma (F16C) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_luma_u16_f16c_matches_scalar() {
use crate::ColorMatrix;
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD00A_0001);
prng_f16(&mut b, 0xD00A_0002);
prng_f16(&mut r, 0xD00A_0003);
let mut simd = std::vec![0u16; w];
let mut scal = std::vec![0u16; w];
unsafe {
gbrpf16_to_luma_u16_row_f16c::<false>(&g, &b, &r, &mut simd, w, ColorMatrix::Bt709, true)
};
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_luma_u16_row::<false>(
&gf,
&bf,
&rf,
&mut scal,
w,
ColorMatrix::Bt709,
true,
);
assert_eq!(simd, scal, "gbrpf16_to_luma_u16 (F16C) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_hsv_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD00B_0001);
prng_f16(&mut b, 0xD00B_0002);
prng_f16(&mut r, 0xD00B_0003);
let mut simd_h = std::vec![0u8; w];
let mut simd_s = std::vec![0u8; w];
let mut simd_v = std::vec![0u8; w];
let mut scal_h = std::vec![0u8; w];
let mut scal_s = std::vec![0u8; w];
let mut scal_v = std::vec![0u8; w];
unsafe {
gbrpf16_to_hsv_row_f16c::<false>(&g, &b, &r, &mut simd_h, &mut simd_s, &mut simd_v, w)
};
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrpf32_to_hsv_row::<false>(
&gf,
&bf,
&rf,
&mut scal_h,
&mut scal_s,
&mut scal_v,
w,
);
assert_eq!(simd_h, scal_h, "gbrpf16 hsv H (F16C) width={w}");
assert_eq!(simd_s, scal_s, "gbrpf16 hsv S (F16C) width={w}");
assert_eq!(simd_v, scal_v, "gbrpf16 hsv V (F16C) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf16_to_rgba_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
let mut a = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD00C_0001);
prng_f16(&mut b, 0xD00C_0002);
prng_f16(&mut r, 0xD00C_0003);
prng_f16(&mut a, 0xD00C_0004);
let mut simd = std::vec![0u8; w * 4];
let mut scal = std::vec![0u8; w * 4];
unsafe { gbrapf16_to_rgba_row_f16c::<false>(&g, &b, &r, &a, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
let af: std::vec::Vec<f32> = a.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrapf32_to_rgba_row::<false>(&gf, &bf, &rf, &af, &mut scal, w);
assert_eq!(simd, scal, "gbrapf16_to_rgba (F16C widen) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf16_to_rgba_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
let mut a = std::vec![half::f16::ZERO; w];
asym_ramp_f16_a(&mut g, &mut b, &mut r, &mut a);
let mut simd = std::vec![0u8; w * 4];
let mut scal = std::vec![0u8; w * 4];
unsafe { gbrapf16_to_rgba_row_f16c::<false>(&g, &b, &r, &a, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
let af: std::vec::Vec<f32> = a.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrapf32_to_rgba_row::<false>(&gf, &bf, &rf, &af, &mut scal, w);
assert_eq!(simd, scal, "gbrapf16_to_rgba lane-order width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf16_to_rgba_u16_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
let mut a = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD00D_0001);
prng_f16(&mut b, 0xD00D_0002);
prng_f16(&mut r, 0xD00D_0003);
prng_f16(&mut a, 0xD00D_0004);
let mut simd = std::vec![0u16; w * 4];
let mut scal = std::vec![0u16; w * 4];
unsafe { gbrapf16_to_rgba_u16_row_f16c::<false>(&g, &b, &r, &a, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
let af: std::vec::Vec<f32> = a.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrapf32_to_rgba_u16_row::<false>(&gf, &bf, &rf, &af, &mut scal, w);
assert_eq!(simd, scal, "gbrapf16_to_rgba_u16 (F16C widen) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 + F16C SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf16_to_rgba_f32_f16c_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw")
|| !std::arch::is_x86_feature_detected!("f16c")
{
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
let mut a = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD00E_0001);
prng_f16(&mut b, 0xD00E_0002);
prng_f16(&mut r, 0xD00E_0003);
prng_f16(&mut a, 0xD00E_0004);
let mut simd = std::vec![0.0f32; w * 4];
let mut scal = std::vec![0.0f32; w * 4];
unsafe { gbrapf16_to_rgba_f32_row_f16c::<false>(&g, &b, &r, &a, &mut simd, w) };
let gf: std::vec::Vec<f32> = g.iter().map(|v| v.to_f32()).collect();
let bf: std::vec::Vec<f32> = b.iter().map(|v| v.to_f32()).collect();
let rf: std::vec::Vec<f32> = r.iter().map(|v| v.to_f32()).collect();
let af: std::vec::Vec<f32> = a.iter().map(|v| v.to_f32()).collect();
scalar::planar_gbr_float::gbrapf32_to_rgba_f32_row::<false>(&gf, &bf, &rf, &af, &mut scal, w);
assert_eq!(simd, scal, "gbrapf16_to_rgba_f32 (F16C widen) width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf16_to_rgba_f16_lossless_matches_scalar() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
let mut a = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xD00F_0001);
prng_f16(&mut b, 0xD00F_0002);
prng_f16(&mut r, 0xD00F_0003);
prng_f16(&mut a, 0xD00F_0004);
let mut simd = std::vec![half::f16::ZERO; w * 4];
let mut scal = std::vec![half::f16::ZERO; w * 4];
unsafe { gbrapf16_to_rgba_f16_row::<false>(&g, &b, &r, &a, &mut simd, w) };
scalar::planar_gbr_f16::gbrapf16_to_rgba_f16_row::<false>(&g, &b, &r, &a, &mut scal, w);
assert_eq!(simd, scal, "gbrapf16_to_rgba_f16 lossless width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf16_to_rgba_f16_lane_order() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
if w > 60 {
continue;
}
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
let mut a = std::vec![half::f16::ZERO; w];
asym_ramp_f16_a(&mut g, &mut b, &mut r, &mut a);
let mut simd = std::vec![half::f16::ZERO; w * 4];
let mut scal = std::vec![half::f16::ZERO; w * 4];
unsafe { gbrapf16_to_rgba_f16_row::<false>(&g, &b, &r, &a, &mut simd, w) };
scalar::planar_gbr_f16::gbrapf16_to_rgba_f16_row::<false>(&g, &b, &r, &a, &mut scal, w);
assert_eq!(simd, scal, "gbrapf16_to_rgba_f16 lane-order width={w}");
}
}
fn be_encode_f32(src: &[f32]) -> std::vec::Vec<f32> {
src
.iter()
.map(|v| f32::from_bits(v.to_bits().swap_bytes()))
.collect()
}
fn be_encode_f16(src: &[half::f16]) -> std::vec::Vec<half::f16> {
src
.iter()
.map(|v| half::f16::from_bits(v.to_bits().swap_bytes()))
.collect()
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgb_be_parity() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xBE01_0001);
prng_f32(&mut b, 0xBE01_0002);
prng_f32(&mut r, 0xBE01_0003);
let mut le_out = std::vec![0u8; w * 3];
let mut be_out = std::vec![0u8; w * 3];
unsafe {
gbrpf32_to_rgb_row::<false>(&g, &b, &r, &mut le_out, w);
}
let g_be = be_encode_f32(&g);
let b_be = be_encode_f32(&b);
let r_be = be_encode_f32(&r);
unsafe {
gbrpf32_to_rgb_row::<true>(&g_be, &b_be, &r_be, &mut be_out, w);
}
assert_eq!(le_out, be_out, "gbrpf32_to_rgb BE parity width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf32_to_rgba_be_parity() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![0.0f32; w];
let mut b = std::vec![0.0f32; w];
let mut r = std::vec![0.0f32; w];
prng_f32(&mut g, 0xBE02_0001);
prng_f32(&mut b, 0xBE02_0002);
prng_f32(&mut r, 0xBE02_0003);
let mut le_out = std::vec![0u8; w * 4];
let mut be_out = std::vec![0u8; w * 4];
unsafe {
gbrpf32_to_rgba_row::<false>(&g, &b, &r, &mut le_out, w);
}
let g_be = be_encode_f32(&g);
let b_be = be_encode_f32(&b);
let r_be = be_encode_f32(&r);
unsafe {
gbrpf32_to_rgba_row::<true>(&g_be, &b_be, &r_be, &mut be_out, w);
}
assert_eq!(le_out, be_out, "gbrpf32_to_rgba BE parity width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrpf16_to_rgb_f16_be_parity() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xBE07_0001);
prng_f16(&mut b, 0xBE07_0002);
prng_f16(&mut r, 0xBE07_0003);
let mut le_out = std::vec![half::f16::ZERO; w * 3];
let mut be_out = std::vec![half::f16::ZERO; w * 3];
unsafe {
gbrpf16_to_rgb_f16_row::<false>(&g, &b, &r, &mut le_out, w);
}
let g_be = be_encode_f16(&g);
let b_be = be_encode_f16(&b);
let r_be = be_encode_f16(&r);
unsafe {
gbrpf16_to_rgb_f16_row::<true>(&g_be, &b_be, &r_be, &mut be_out, w);
}
assert_eq!(le_out, be_out, "gbrpf16_to_rgb_f16 BE parity width={w}");
}
}
#[test]
#[cfg_attr(miri, ignore = "AVX-512 SIMD intrinsics unsupported by Miri")]
fn avx512_gbrapf16_to_rgba_f16_be_parity() {
if !std::arch::is_x86_feature_detected!("avx512bw") {
return;
}
for &w in WIDTHS {
let mut g = std::vec![half::f16::ZERO; w];
let mut b = std::vec![half::f16::ZERO; w];
let mut r = std::vec![half::f16::ZERO; w];
let mut a = std::vec![half::f16::ZERO; w];
prng_f16(&mut g, 0xBE0F_0001);
prng_f16(&mut b, 0xBE0F_0002);
prng_f16(&mut r, 0xBE0F_0003);
prng_f16(&mut a, 0xBE0F_0004);
let mut le_out = std::vec![half::f16::ZERO; w * 4];
let mut be_out = std::vec![half::f16::ZERO; w * 4];
unsafe {
gbrapf16_to_rgba_f16_row::<false>(&g, &b, &r, &a, &mut le_out, w);
}
let g_be = be_encode_f16(&g);
let b_be = be_encode_f16(&b);
let r_be = be_encode_f16(&r);
let a_be = be_encode_f16(&a);
unsafe {
gbrapf16_to_rgba_f16_row::<true>(&g_be, &b_be, &r_be, &a_be, &mut be_out, w);
}
assert_eq!(le_out, be_out, "gbrapf16_to_rgba_f16 BE parity width={w}");
}
}