use super::super::*;
use crate::{ColorMatrix, row::scalar};
fn pseudo_random_ayuv64(width: usize, seed: usize) -> std::vec::Vec<u16> {
(0..width * 4)
.map(|i| {
let s = i.wrapping_mul(seed).wrapping_add(seed.wrapping_mul(3));
(s & 0xFFFF) as u16
})
.collect()
}
fn check_rgb<const ALPHA: bool, const ALPHA_SRC: bool>(
width: usize,
matrix: ColorMatrix,
full_range: bool,
) {
let p = pseudo_random_ayuv64(width, 0xAA55);
let bpp = if ALPHA { 4 } else { 3 };
let mut s = std::vec![0u8; width * bpp];
let mut k = std::vec![0u8; width * bpp];
scalar::ayuv64_to_rgb_or_rgba_row::<ALPHA, ALPHA_SRC, false>(
&p, &mut s, width, matrix, full_range,
);
unsafe {
ayuv64_to_rgb_or_rgba_row::<ALPHA, ALPHA_SRC, false>(&p, &mut k, width, matrix, full_range);
}
assert_eq!(
s,
k,
"AVX2 ayuv64<ALPHA={ALPHA}, ALPHA_SRC={ALPHA_SRC}>→{} diverges (width={width}, matrix={matrix:?}, full_range={full_range})",
if ALPHA { "RGBA" } else { "RGB" }
);
}
fn check_rgb_u16<const ALPHA: bool, const ALPHA_SRC: bool>(
width: usize,
matrix: ColorMatrix,
full_range: bool,
) {
let p = pseudo_random_ayuv64(width, 0xAA55);
let bpp = if ALPHA { 4 } else { 3 };
let mut s = std::vec![0u16; width * bpp];
let mut k = std::vec![0u16; width * bpp];
scalar::ayuv64_to_rgb_u16_or_rgba_u16_row::<ALPHA, ALPHA_SRC, false>(
&p, &mut s, width, matrix, full_range,
);
unsafe {
ayuv64_to_rgb_u16_or_rgba_u16_row::<ALPHA, ALPHA_SRC, false>(
&p, &mut k, width, matrix, full_range,
);
}
assert_eq!(
s,
k,
"AVX2 ayuv64<ALPHA={ALPHA}, ALPHA_SRC={ALPHA_SRC}>→{} u16 diverges (width={width}, matrix={matrix:?}, full_range={full_range})",
if ALPHA { "RGBA" } else { "RGB" }
);
}
fn check_luma(width: usize) {
let p = pseudo_random_ayuv64(width, 0xC001);
let mut s = std::vec![0u8; width];
let mut k = std::vec![0u8; width];
scalar::ayuv64_to_luma_row::<false>(&p, &mut s, width);
unsafe {
ayuv64_to_luma_row::<false>(&p, &mut k, width);
}
assert_eq!(s, k, "AVX2 ayuv64→luma diverges (width={width})");
}
fn check_luma_u16(width: usize) {
let p = pseudo_random_ayuv64(width, 0xC001);
let mut s = std::vec![0u16; width];
let mut k = std::vec![0u16; width];
scalar::ayuv64_to_luma_u16_row::<false>(&p, &mut s, width);
unsafe {
ayuv64_to_luma_u16_row::<false>(&p, &mut k, width);
}
assert_eq!(s, k, "AVX2 ayuv64→luma u16 diverges (width={width})");
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx2_ayuv64_rgb_matches_scalar_all_matrices() {
if !std::arch::is_x86_feature_detected!("avx2") {
return;
}
for m in [
ColorMatrix::Bt601,
ColorMatrix::Bt709,
ColorMatrix::Bt2020Ncl,
ColorMatrix::Smpte240m,
ColorMatrix::Fcc,
ColorMatrix::YCgCo,
] {
for full in [true, false] {
check_rgb::<false, false>(32, m, full); check_rgb::<true, true>(32, m, full); check_rgb_u16::<false, false>(32, m, full); check_rgb_u16::<true, true>(32, m, full); }
}
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx2_ayuv64_matches_scalar_widths() {
if !std::arch::is_x86_feature_detected!("avx2") {
return;
}
for w in [
1usize, 2, 3, 15, 16, 17, 31, 32, 33, 47, 48, 49, 1920, 1921, 1923,
] {
check_rgb::<false, false>(w, ColorMatrix::Bt709, false);
check_rgb::<true, true>(w, ColorMatrix::Bt709, true);
check_rgb_u16::<false, false>(w, ColorMatrix::Bt2020Ncl, true);
check_rgb_u16::<true, true>(w, ColorMatrix::Bt601, false);
check_luma(w);
check_luma_u16(w);
}
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx2_ayuv64_lane_order_per_pixel_y_and_a() {
if !std::arch::is_x86_feature_detected!("avx2") {
return;
}
const W: usize = 32;
let mut packed = std::vec::Vec::with_capacity(W * 4);
for n in 0..W {
packed.push((2 * n + 1) as u16); packed.push((n + 1) as u16); packed.push(32768u16); packed.push(32768u16); }
let mut luma_out = std::vec![0u16; W];
unsafe {
ayuv64_to_luma_u16_row::<false>(&packed, &mut luma_out, W);
}
let expected_luma: std::vec::Vec<u16> = (1..=W as u16).collect();
assert_eq!(
luma_out, expected_luma,
"luma_u16: Y lane order incorrect — expected Y[n]=n+1, got {luma_out:?}"
);
let mut rgba_out = std::vec![0u16; W * 4];
unsafe {
ayuv64_to_rgb_u16_or_rgba_u16_row::<true, true, false>(
&packed,
&mut rgba_out,
W,
ColorMatrix::Bt709,
true, );
}
let alpha_out: std::vec::Vec<u16> = (0..W).map(|n| rgba_out[n * 4 + 3]).collect();
let expected_alpha: std::vec::Vec<u16> = (0..W as u16).map(|n| 2 * n + 1).collect();
assert_eq!(
alpha_out, expected_alpha,
"rgba_u16: A lane order incorrect — expected A[n]=2n+1, got {alpha_out:?}"
);
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx2_ayuv64_be_le_simd_parity() {
if !std::arch::is_x86_feature_detected!("avx2") {
return;
}
for w in [7usize, 8, 17, 33] {
let intended = pseudo_random_ayuv64(w, 0xBEEF);
let le_bytes: std::vec::Vec<u8> = intended.iter().flat_map(|v| v.to_le_bytes()).collect();
let be_bytes: std::vec::Vec<u8> = intended.iter().flat_map(|v| v.to_be_bytes()).collect();
let le: std::vec::Vec<u16> = le_bytes
.chunks_exact(2)
.map(|b| u16::from_ne_bytes([b[0], b[1]]))
.collect();
let be: std::vec::Vec<u16> = be_bytes
.chunks_exact(2)
.map(|b| u16::from_ne_bytes([b[0], b[1]]))
.collect();
{
let mut out_le = std::vec![0u8; w * 3];
let mut out_be = std::vec![0u8; w * 3];
unsafe {
ayuv64_to_rgb_or_rgba_row::<false, false, false>(
&le,
&mut out_le,
w,
ColorMatrix::Bt709,
false,
);
ayuv64_to_rgb_or_rgba_row::<false, false, true>(
&be,
&mut out_be,
w,
ColorMatrix::Bt709,
false,
);
}
assert_eq!(
out_le, out_be,
"avx2 ayuv64 BE-vs-LE SIMD parity failed (rgb, w={w})"
);
}
{
let mut out_le = std::vec![0u8; w * 4];
let mut out_be = std::vec![0u8; w * 4];
unsafe {
ayuv64_to_rgb_or_rgba_row::<true, true, false>(
&le,
&mut out_le,
w,
ColorMatrix::Bt709,
false,
);
ayuv64_to_rgb_or_rgba_row::<true, true, true>(
&be,
&mut out_be,
w,
ColorMatrix::Bt709,
false,
);
}
assert_eq!(
out_le, out_be,
"avx2 ayuv64 BE-vs-LE SIMD parity failed (rgba+srcα, w={w})"
);
}
{
let mut out_le = std::vec![0u16; w * 3];
let mut out_be = std::vec![0u16; w * 3];
unsafe {
ayuv64_to_rgb_u16_or_rgba_u16_row::<false, false, false>(
&le,
&mut out_le,
w,
ColorMatrix::Bt709,
true,
);
ayuv64_to_rgb_u16_or_rgba_u16_row::<false, false, true>(
&be,
&mut out_be,
w,
ColorMatrix::Bt709,
true,
);
}
assert_eq!(
out_le, out_be,
"avx2 ayuv64 BE-vs-LE SIMD parity failed (rgb u16, w={w})"
);
}
{
let mut out_le = std::vec![0u16; w * 4];
let mut out_be = std::vec![0u16; w * 4];
unsafe {
ayuv64_to_rgb_u16_or_rgba_u16_row::<true, true, false>(
&le,
&mut out_le,
w,
ColorMatrix::Bt709,
true,
);
ayuv64_to_rgb_u16_or_rgba_u16_row::<true, true, true>(
&be,
&mut out_be,
w,
ColorMatrix::Bt709,
true,
);
}
assert_eq!(
out_le, out_be,
"avx2 ayuv64 BE-vs-LE SIMD parity failed (rgba u16+srcα, w={w})"
);
}
{
let mut out_le = std::vec![0u8; w];
let mut out_be = std::vec![0u8; w];
unsafe {
ayuv64_to_luma_row::<false>(&le, &mut out_le, w);
ayuv64_to_luma_row::<true>(&be, &mut out_be, w);
}
assert_eq!(
out_le, out_be,
"avx2 ayuv64 BE-vs-LE SIMD parity failed (luma u8, w={w})"
);
}
{
let mut out_le = std::vec![0u16; w];
let mut out_be = std::vec![0u16; w];
unsafe {
ayuv64_to_luma_u16_row::<false>(&le, &mut out_le, w);
ayuv64_to_luma_u16_row::<true>(&be, &mut out_be, w);
}
assert_eq!(
out_le, out_be,
"avx2 ayuv64 BE-vs-LE SIMD parity failed (luma u16, w={w})"
);
}
}
}