use super::super::*;
use crate::{ColorMatrix, row::scalar};
fn pseudo_random_y216(width: usize, seed: usize) -> std::vec::Vec<u16> {
(0..width * 2)
.map(|i| ((i.wrapping_mul(seed).wrapping_add(seed * 3)) & 0xFFFF) as u16)
.collect()
}
fn check_rgb<const ALPHA: bool>(width: usize, matrix: ColorMatrix, full_range: bool) {
let p = pseudo_random_y216(width, 0xAA55);
let bpp = if ALPHA { 4 } else { 3 };
let mut s = std::vec![0u8; width * bpp];
let mut k = std::vec![0u8; width * bpp];
scalar::y216_to_rgb_or_rgba_row::<ALPHA, false>(&p, &mut s, width, matrix, full_range);
unsafe {
y216_to_rgb_or_rgba_row::<ALPHA, false>(&p, &mut k, width, matrix, full_range);
}
assert_eq!(
s,
k,
"AVX2 y216<ALPHA={ALPHA}>→{} diverges (width={width}, matrix={matrix:?}, full_range={full_range})",
if ALPHA { "RGBA" } else { "RGB" }
);
}
fn check_rgb_u16<const ALPHA: bool>(width: usize, matrix: ColorMatrix, full_range: bool) {
let p = pseudo_random_y216(width, 0xAA55);
let bpp = if ALPHA { 4 } else { 3 };
let mut s = std::vec![0u16; width * bpp];
let mut k = std::vec![0u16; width * bpp];
scalar::y216_to_rgb_u16_or_rgba_u16_row::<ALPHA, false>(&p, &mut s, width, matrix, full_range);
unsafe {
y216_to_rgb_u16_or_rgba_u16_row::<ALPHA, false>(&p, &mut k, width, matrix, full_range);
}
assert_eq!(
s,
k,
"AVX2 y216<ALPHA={ALPHA}>→{} u16 diverges (width={width}, matrix={matrix:?}, full_range={full_range})",
if ALPHA { "RGBA" } else { "RGB" }
);
}
fn check_luma(width: usize) {
let p = pseudo_random_y216(width, 0xC001);
let mut s = std::vec![0u8; width];
let mut k = std::vec![0u8; width];
scalar::y216_to_luma_row::<false>(&p, &mut s, width);
unsafe {
y216_to_luma_row::<false>(&p, &mut k, width);
}
assert_eq!(s, k, "AVX2 y216→luma u8 diverges (width={width})");
}
fn check_luma_u16(width: usize) {
let p = pseudo_random_y216(width, 0xC001);
let mut s = std::vec![0u16; width];
let mut k = std::vec![0u16; width];
scalar::y216_to_luma_u16_row::<false>(&p, &mut s, width);
unsafe {
y216_to_luma_u16_row::<false>(&p, &mut k, width);
}
assert_eq!(s, k, "AVX2 y216→luma u16 diverges (width={width})");
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx2_y216_rgb_matches_scalar_all_matrices() {
if !std::arch::is_x86_feature_detected!("avx2") {
return;
}
for m in [
ColorMatrix::Bt601,
ColorMatrix::Bt709,
ColorMatrix::Bt2020Ncl,
ColorMatrix::Smpte240m,
ColorMatrix::Fcc,
ColorMatrix::YCgCo,
] {
for full in [true, false] {
check_rgb::<false>(32, m, full);
check_rgb::<true>(32, m, full);
check_rgb_u16::<false>(32, m, full);
check_rgb_u16::<true>(32, m, full);
}
}
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx2_y216_matches_scalar_widths() {
if !std::arch::is_x86_feature_detected!("avx2") {
return;
}
for w in [16usize, 18, 32, 34, 62, 64, 66, 128, 1920, 1922] {
check_rgb::<false>(w, ColorMatrix::Bt709, false);
check_rgb::<true>(w, ColorMatrix::Bt709, true);
check_rgb_u16::<false>(w, ColorMatrix::Bt2020Ncl, true);
check_rgb_u16::<true>(w, ColorMatrix::Bt601, false);
}
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx2_y216_luma_matches_scalar_widths() {
if !std::arch::is_x86_feature_detected!("avx2") {
return;
}
for w in [16usize, 18, 32, 34, 62, 64, 66, 128, 1920, 1922] {
check_luma(w);
check_luma_u16(w);
}
}
fn build_y216_packed_y_n_plus_1_u_2k_plus_1_v_neutral(width: usize) -> std::vec::Vec<u16> {
let mut packed = std::vec![0u16; width * 2];
for k in 0..(width / 2) {
let y0 = (2 * k as u16) + 1;
let y1 = (2 * k as u16) + 2;
let u = (2 * k as u16) + 1;
packed[k * 4] = y0;
packed[k * 4 + 1] = u;
packed[k * 4 + 2] = y1;
packed[k * 4 + 3] = 0x8000; }
packed
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn avx2_y216_lane_order_per_pixel_y_and_u() {
if !std::arch::is_x86_feature_detected!("avx2") {
return;
}
const W: usize = 32;
let packed = build_y216_packed_y_n_plus_1_u_2k_plus_1_v_neutral(W);
let mut luma_u16 = std::vec![0u16; W];
unsafe {
y216_to_luma_u16_row::<false>(&packed, &mut luma_u16, W);
}
let expected_luma: std::vec::Vec<u16> = (1..=W as u16).collect();
assert_eq!(luma_u16, expected_luma, "AVX2 y216 luma_u16 reorder bug");
let mut simd_rgb = std::vec![0u16; W * 3];
let mut scalar_rgb = std::vec![0u16; W * 3];
unsafe {
y216_to_rgb_u16_or_rgba_u16_row::<false, false>(
&packed,
&mut simd_rgb,
W,
ColorMatrix::Bt709,
false,
);
}
scalar::y216_to_rgb_u16_or_rgba_u16_row::<false, false>(
&packed,
&mut scalar_rgb,
W,
ColorMatrix::Bt709,
false,
);
assert_eq!(
simd_rgb, scalar_rgb,
"AVX2 y216 SIMD vs scalar diverges (u16 RGB, i64 chroma)"
);
}