use super::super::*;
use crate::{ColorMatrix, row::scalar};
fn pack_v410(u: u32, y: u32, v: u32) -> u32 {
debug_assert!(u < 1024 && y < 1024 && v < 1024);
(v << 20) | (y << 10) | u
}
fn pseudo_random_v410_words(n: usize, seed: usize) -> std::vec::Vec<u32> {
(0..n)
.map(|i| {
let u = ((i * 37 + seed) & 0x3FF) as u32;
let y = ((i * 53 + seed * 3) & 0x3FF) as u32;
let v = ((i * 71 + seed * 7) & 0x3FF) as u32;
pack_v410(u, y, v)
})
.collect()
}
fn check_rgb(width: usize, matrix: ColorMatrix, full_range: bool) {
let p = pseudo_random_v410_words(width, 0xAA55);
let mut s = std::vec![0u8; width * 3];
let mut k = std::vec![0u8; width * 3];
scalar::v410_to_rgb_or_rgba_row::<false, false>(&p, &mut s, width, matrix, full_range);
unsafe {
v410_to_rgb_or_rgba_row::<false, false>(&p, &mut k, width, matrix, full_range);
}
assert_eq!(
s, k,
"AVX-512 v410→RGB diverges (width={width}, matrix={matrix:?}, full_range={full_range})"
);
}
fn check_rgba(width: usize, matrix: ColorMatrix, full_range: bool) {
let p = pseudo_random_v410_words(width, 0xAA55);
let mut s = std::vec![0u8; width * 4];
let mut k = std::vec![0u8; width * 4];
scalar::v410_to_rgb_or_rgba_row::<true, false>(&p, &mut s, width, matrix, full_range);
unsafe {
v410_to_rgb_or_rgba_row::<true, false>(&p, &mut k, width, matrix, full_range);
}
assert_eq!(
s, k,
"AVX-512 v410→RGBA diverges (width={width}, matrix={matrix:?}, full_range={full_range})"
);
}
fn check_rgb_u16(width: usize, matrix: ColorMatrix, full_range: bool) {
let p = pseudo_random_v410_words(width, 0xAA55);
let mut s = std::vec![0u16; width * 3];
let mut k = std::vec![0u16; width * 3];
scalar::v410_to_rgb_u16_or_rgba_u16_row::<false, false>(&p, &mut s, width, matrix, full_range);
unsafe {
v410_to_rgb_u16_or_rgba_u16_row::<false, false>(&p, &mut k, width, matrix, full_range);
}
assert_eq!(
s, k,
"AVX-512 v410→RGB u16 diverges (width={width}, matrix={matrix:?}, full_range={full_range})"
);
}
fn check_rgba_u16(width: usize, matrix: ColorMatrix, full_range: bool) {
let p = pseudo_random_v410_words(width, 0xAA55);
let mut s = std::vec![0u16; width * 4];
let mut k = std::vec![0u16; width * 4];
scalar::v410_to_rgb_u16_or_rgba_u16_row::<true, false>(&p, &mut s, width, matrix, full_range);
unsafe {
v410_to_rgb_u16_or_rgba_u16_row::<true, false>(&p, &mut k, width, matrix, full_range);
}
assert_eq!(
s, k,
"AVX-512 v410→RGBA u16 diverges (width={width}, matrix={matrix:?}, full_range={full_range})"
);
}
fn check_luma(width: usize) {
let p = pseudo_random_v410_words(width, 0xC001);
let mut s = std::vec![0u8; width];
let mut k = std::vec![0u8; width];
scalar::v410_to_luma_row::<false>(&p, &mut s, width);
unsafe {
v410_to_luma_row::<false>(&p, &mut k, width);
}
assert_eq!(s, k, "AVX-512 v410→luma diverges (width={width})");
}
fn check_luma_u16(width: usize) {
let p = pseudo_random_v410_words(width, 0xC001);
let mut s = std::vec![0u16; width];
let mut k = std::vec![0u16; width];
scalar::v410_to_luma_u16_row::<false>(&p, &mut s, width);
unsafe {
v410_to_luma_u16_row::<false>(&p, &mut k, width);
}
assert_eq!(s, k, "AVX-512 v410→luma u16 diverges (width={width})");
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx512_v410_rgb_matches_scalar_all_matrices() {
if !std::arch::is_x86_feature_detected!("avx512f")
|| !std::arch::is_x86_feature_detected!("avx512bw")
{
return;
}
for m in [
ColorMatrix::Bt601,
ColorMatrix::Bt709,
ColorMatrix::Bt2020Ncl,
ColorMatrix::Smpte240m,
ColorMatrix::Fcc,
ColorMatrix::YCgCo,
] {
for full in [true, false] {
check_rgb(16, m, full);
check_rgba(16, m, full);
check_rgb_u16(16, m, full);
check_rgba_u16(16, m, full);
}
}
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx512_v410_matches_scalar_widths() {
if !std::arch::is_x86_feature_detected!("avx512f")
|| !std::arch::is_x86_feature_detected!("avx512bw")
{
return;
}
for w in [
1usize, 4, 8, 15, 16, 17, 31, 32, 48, 64, 1280, 1920, 1921, 1937,
] {
check_rgb(w, ColorMatrix::Bt709, false);
check_rgba(w, ColorMatrix::Bt709, true);
check_rgb_u16(w, ColorMatrix::Bt2020Ncl, true);
check_rgba_u16(w, ColorMatrix::Bt601, false);
}
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx512_v410_luma_matches_scalar_widths() {
if !std::arch::is_x86_feature_detected!("avx512f")
|| !std::arch::is_x86_feature_detected!("avx512bw")
{
return;
}
for w in [
1usize, 4, 8, 15, 16, 17, 31, 32, 48, 64, 1280, 1920, 1921, 1937,
] {
check_luma(w);
check_luma_u16(w);
}
}
fn build_v410_packed_y_n_plus_1_u_2n_plus_1_v_neutral(width: usize) -> std::vec::Vec<u32> {
(0..width)
.map(|n| {
let y = (n as u32) + 1;
let u = 2 * (n as u32) + 1;
let v = 512u32;
(v << 20) | (y << 10) | u
})
.collect()
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx512_v410_lane_order_per_pixel_y_and_u() {
if !std::arch::is_x86_feature_detected!("avx512f")
|| !std::arch::is_x86_feature_detected!("avx512bw")
{
return;
}
const W: usize = 32;
let packed = build_v410_packed_y_n_plus_1_u_2n_plus_1_v_neutral(W);
let mut luma = std::vec![0u16; W];
unsafe {
v410_to_luma_u16_row::<false>(&packed, &mut luma, W);
}
let expected_luma: std::vec::Vec<u16> = (1..=W as u16).collect();
assert_eq!(luma, expected_luma, "avx512 v410 luma reorder bug");
let mut simd_rgb = std::vec![0u8; W * 3];
let mut scalar_rgb = std::vec![0u8; W * 3];
unsafe {
v410_to_rgb_or_rgba_row::<false, false>(
&packed,
&mut simd_rgb,
W,
crate::ColorMatrix::Bt709,
false,
);
}
scalar::v410_to_rgb_or_rgba_row::<false, false>(
&packed,
&mut scalar_rgb,
W,
crate::ColorMatrix::Bt709,
false,
);
assert_eq!(
simd_rgb, scalar_rgb,
"avx512 v410 SIMD vs scalar diverges — lane-order bug"
);
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx512_v410_be_le_simd_parity() {
if !std::arch::is_x86_feature_detected!("avx512f")
|| !std::arch::is_x86_feature_detected!("avx512bw")
{
return;
}
for w in [15usize, 16, 33, 65] {
let intended = pseudo_random_v410_words(w, 0xBEEF);
let le_bytes: std::vec::Vec<u8> = intended.iter().flat_map(|v| v.to_le_bytes()).collect();
let be_bytes: std::vec::Vec<u8> = intended.iter().flat_map(|v| v.to_be_bytes()).collect();
let le: std::vec::Vec<u32> = le_bytes
.chunks_exact(4)
.map(|b| u32::from_ne_bytes([b[0], b[1], b[2], b[3]]))
.collect();
let be: std::vec::Vec<u32> = be_bytes
.chunks_exact(4)
.map(|b| u32::from_ne_bytes([b[0], b[1], b[2], b[3]]))
.collect();
for (alpha, bpp) in [(false, 3usize), (true, 4)] {
let mut out_le = std::vec![0u8; w * bpp];
let mut out_be = std::vec![0u8; w * bpp];
unsafe {
if alpha {
v410_to_rgb_or_rgba_row::<true, false>(&le, &mut out_le, w, ColorMatrix::Bt709, false);
v410_to_rgb_or_rgba_row::<true, true>(&be, &mut out_be, w, ColorMatrix::Bt709, false);
} else {
v410_to_rgb_or_rgba_row::<false, false>(&le, &mut out_le, w, ColorMatrix::Bt709, false);
v410_to_rgb_or_rgba_row::<false, true>(&be, &mut out_be, w, ColorMatrix::Bt709, false);
}
}
assert_eq!(
out_le, out_be,
"avx512 v410 BE-vs-LE SIMD parity failed (alpha={alpha}, w={w})"
);
}
for (alpha, bpp) in [(false, 3usize), (true, 4)] {
let mut out_le = std::vec![0u16; w * bpp];
let mut out_be = std::vec![0u16; w * bpp];
unsafe {
if alpha {
v410_to_rgb_u16_or_rgba_u16_row::<true, false>(
&le,
&mut out_le,
w,
ColorMatrix::Bt709,
true,
);
v410_to_rgb_u16_or_rgba_u16_row::<true, true>(
&be,
&mut out_be,
w,
ColorMatrix::Bt709,
true,
);
} else {
v410_to_rgb_u16_or_rgba_u16_row::<false, false>(
&le,
&mut out_le,
w,
ColorMatrix::Bt709,
true,
);
v410_to_rgb_u16_or_rgba_u16_row::<false, true>(
&be,
&mut out_be,
w,
ColorMatrix::Bt709,
true,
);
}
}
assert_eq!(
out_le, out_be,
"avx512 v410 BE-vs-LE SIMD parity failed (u16, alpha={alpha}, w={w})"
);
}
{
let mut out_le = std::vec![0u8; w];
let mut out_be = std::vec![0u8; w];
unsafe {
v410_to_luma_row::<false>(&le, &mut out_le, w);
v410_to_luma_row::<true>(&be, &mut out_be, w);
}
assert_eq!(
out_le, out_be,
"avx512 v410 BE-vs-LE SIMD parity failed (luma u8, w={w})"
);
}
{
let mut out_le = std::vec![0u16; w];
let mut out_be = std::vec![0u16; w];
unsafe {
v410_to_luma_u16_row::<false>(&le, &mut out_le, w);
v410_to_luma_u16_row::<true>(&be, &mut out_be, w);
}
assert_eq!(
out_le, out_be,
"avx512 v410 BE-vs-LE SIMD parity failed (luma u16, w={w})"
);
}
}
}