#[cfg(any(
target_arch = "aarch64",
target_arch = "x86_64",
target_arch = "wasm32"
))]
use crate::row::arch;
#[cfg(target_arch = "aarch64")]
use crate::row::neon_available;
#[cfg(target_arch = "wasm32")]
use crate::row::simd128_available;
#[cfg(target_arch = "x86_64")]
use crate::row::{avx2_available, avx512_available, sse41_available};
use crate::{
ColorMatrix,
row::{rgb_row_bytes, rgb_row_elems, rgba_row_bytes, rgba_row_elems, scalar},
};
#[cfg_attr(not(tarpaulin), inline(always))]
fn ayuv64_packed_elems(width: usize) -> usize {
match width.checked_mul(4) {
Some(n) => n,
None => panic!("width ({width}) x 4 overflows usize (AYUV64 packed row)"),
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn ayuv64_to_rgb_row(
packed: &[u16],
rgb_out: &mut [u8],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
be_input: bool,
) {
assert!(
packed.len() >= ayuv64_packed_elems(width),
"packed row too short"
);
assert!(
rgb_out.len() >= rgb_row_bytes(width),
"rgb_out row too short"
);
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if be_input {
unsafe { arch::neon::ayuv64_to_rgb_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::neon::ayuv64_to_rgb_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if be_input {
unsafe { arch::x86_avx512::ayuv64_to_rgb_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_avx512::ayuv64_to_rgb_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
if avx2_available() {
if be_input {
unsafe { arch::x86_avx2::ayuv64_to_rgb_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_avx2::ayuv64_to_rgb_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
if sse41_available() {
if be_input {
unsafe { arch::x86_sse41::ayuv64_to_rgb_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_sse41::ayuv64_to_rgb_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
if be_input {
unsafe { arch::wasm_simd128::ayuv64_to_rgb_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::wasm_simd128::ayuv64_to_rgb_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
},
_ => {}
}
}
if be_input {
scalar::ayuv64_to_rgb_row::<true>(packed, rgb_out, width, matrix, full_range);
} else {
scalar::ayuv64_to_rgb_row::<false>(packed, rgb_out, width, matrix, full_range);
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn ayuv64_to_rgba_row(
packed: &[u16],
rgba_out: &mut [u8],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
be_input: bool,
) {
assert!(
packed.len() >= ayuv64_packed_elems(width),
"packed row too short"
);
assert!(
rgba_out.len() >= rgba_row_bytes(width),
"rgba_out row too short"
);
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if be_input {
unsafe { arch::neon::ayuv64_to_rgba_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::neon::ayuv64_to_rgba_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if be_input {
unsafe { arch::x86_avx512::ayuv64_to_rgba_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_avx512::ayuv64_to_rgba_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
if avx2_available() {
if be_input {
unsafe { arch::x86_avx2::ayuv64_to_rgba_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_avx2::ayuv64_to_rgba_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
if sse41_available() {
if be_input {
unsafe { arch::x86_sse41::ayuv64_to_rgba_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_sse41::ayuv64_to_rgba_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
if be_input {
unsafe { arch::wasm_simd128::ayuv64_to_rgba_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::wasm_simd128::ayuv64_to_rgba_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
},
_ => {}
}
}
if be_input {
scalar::ayuv64_to_rgba_row::<true>(packed, rgba_out, width, matrix, full_range);
} else {
scalar::ayuv64_to_rgba_row::<false>(packed, rgba_out, width, matrix, full_range);
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn ayuv64_to_rgb_u16_row(
packed: &[u16],
rgb_out: &mut [u16],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
be_input: bool,
) {
assert!(
packed.len() >= ayuv64_packed_elems(width),
"packed row too short"
);
assert!(
rgb_out.len() >= rgb_row_elems(width),
"rgb_out row too short"
);
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if be_input {
unsafe { arch::neon::ayuv64_to_rgb_u16_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::neon::ayuv64_to_rgb_u16_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if be_input {
unsafe { arch::x86_avx512::ayuv64_to_rgb_u16_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_avx512::ayuv64_to_rgb_u16_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
if avx2_available() {
if be_input {
unsafe { arch::x86_avx2::ayuv64_to_rgb_u16_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_avx2::ayuv64_to_rgb_u16_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
if sse41_available() {
if be_input {
unsafe { arch::x86_sse41::ayuv64_to_rgb_u16_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_sse41::ayuv64_to_rgb_u16_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
if be_input {
unsafe { arch::wasm_simd128::ayuv64_to_rgb_u16_row::<true>(packed, rgb_out, width, matrix, full_range); }
} else {
unsafe { arch::wasm_simd128::ayuv64_to_rgb_u16_row::<false>(packed, rgb_out, width, matrix, full_range); }
}
return;
}
},
_ => {}
}
}
if be_input {
scalar::ayuv64_to_rgb_u16_row::<true>(packed, rgb_out, width, matrix, full_range);
} else {
scalar::ayuv64_to_rgb_u16_row::<false>(packed, rgb_out, width, matrix, full_range);
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn ayuv64_to_rgba_u16_row(
packed: &[u16],
rgba_out: &mut [u16],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
be_input: bool,
) {
assert!(
packed.len() >= ayuv64_packed_elems(width),
"packed row too short"
);
assert!(
rgba_out.len() >= rgba_row_elems(width),
"rgba_out row too short"
);
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if be_input {
unsafe { arch::neon::ayuv64_to_rgba_u16_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::neon::ayuv64_to_rgba_u16_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if be_input {
unsafe { arch::x86_avx512::ayuv64_to_rgba_u16_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_avx512::ayuv64_to_rgba_u16_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
if avx2_available() {
if be_input {
unsafe { arch::x86_avx2::ayuv64_to_rgba_u16_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_avx2::ayuv64_to_rgba_u16_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
if sse41_available() {
if be_input {
unsafe { arch::x86_sse41::ayuv64_to_rgba_u16_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::x86_sse41::ayuv64_to_rgba_u16_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
if be_input {
unsafe { arch::wasm_simd128::ayuv64_to_rgba_u16_row::<true>(packed, rgba_out, width, matrix, full_range); }
} else {
unsafe { arch::wasm_simd128::ayuv64_to_rgba_u16_row::<false>(packed, rgba_out, width, matrix, full_range); }
}
return;
}
},
_ => {}
}
}
if be_input {
scalar::ayuv64_to_rgba_u16_row::<true>(packed, rgba_out, width, matrix, full_range);
} else {
scalar::ayuv64_to_rgba_u16_row::<false>(packed, rgba_out, width, matrix, full_range);
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn ayuv64_to_luma_row(
packed: &[u16],
luma_out: &mut [u8],
width: usize,
use_simd: bool,
be_input: bool,
) {
assert!(
packed.len() >= ayuv64_packed_elems(width),
"packed row too short"
);
assert!(luma_out.len() >= width, "luma_out row too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if be_input {
unsafe { arch::neon::ayuv64_to_luma_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::neon::ayuv64_to_luma_row::<false>(packed, luma_out, width); }
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if be_input {
unsafe { arch::x86_avx512::ayuv64_to_luma_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::x86_avx512::ayuv64_to_luma_row::<false>(packed, luma_out, width); }
}
return;
}
if avx2_available() {
if be_input {
unsafe { arch::x86_avx2::ayuv64_to_luma_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::x86_avx2::ayuv64_to_luma_row::<false>(packed, luma_out, width); }
}
return;
}
if sse41_available() {
if be_input {
unsafe { arch::x86_sse41::ayuv64_to_luma_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::x86_sse41::ayuv64_to_luma_row::<false>(packed, luma_out, width); }
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
if be_input {
unsafe { arch::wasm_simd128::ayuv64_to_luma_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::wasm_simd128::ayuv64_to_luma_row::<false>(packed, luma_out, width); }
}
return;
}
},
_ => {}
}
}
if be_input {
scalar::ayuv64_to_luma_row::<true>(packed, luma_out, width);
} else {
scalar::ayuv64_to_luma_row::<false>(packed, luma_out, width);
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn ayuv64_to_luma_u16_row(
packed: &[u16],
luma_out: &mut [u16],
width: usize,
use_simd: bool,
be_input: bool,
) {
assert!(
packed.len() >= ayuv64_packed_elems(width),
"packed row too short"
);
assert!(luma_out.len() >= width, "luma_out row too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if be_input {
unsafe { arch::neon::ayuv64_to_luma_u16_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::neon::ayuv64_to_luma_u16_row::<false>(packed, luma_out, width); }
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if be_input {
unsafe { arch::x86_avx512::ayuv64_to_luma_u16_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::x86_avx512::ayuv64_to_luma_u16_row::<false>(packed, luma_out, width); }
}
return;
}
if avx2_available() {
if be_input {
unsafe { arch::x86_avx2::ayuv64_to_luma_u16_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::x86_avx2::ayuv64_to_luma_u16_row::<false>(packed, luma_out, width); }
}
return;
}
if sse41_available() {
if be_input {
unsafe { arch::x86_sse41::ayuv64_to_luma_u16_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::x86_sse41::ayuv64_to_luma_u16_row::<false>(packed, luma_out, width); }
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
if be_input {
unsafe { arch::wasm_simd128::ayuv64_to_luma_u16_row::<true>(packed, luma_out, width); }
} else {
unsafe { arch::wasm_simd128::ayuv64_to_luma_u16_row::<false>(packed, luma_out, width); }
}
return;
}
},
_ => {}
}
}
if be_input {
scalar::ayuv64_to_luma_u16_row::<true>(packed, luma_out, width);
} else {
scalar::ayuv64_to_luma_u16_row::<false>(packed, luma_out, width);
}
}
#[cfg(all(test, feature = "std"))]
mod tests {
use super::*;
fn pack_ayuv64(a: u16, y: u16, u: u16, v: u16) -> [u16; 4] {
[a, y, u, v]
}
fn solid_ayuv64(width: usize, y: u16, a: u16) -> std::vec::Vec<u16> {
let quad = pack_ayuv64(a, y, 32768, 32768);
(0..width)
.flat_map(|_| quad)
.map(|v| u16::from_ne_bytes(v.to_le_bytes()))
.collect()
}
fn solid_ayuv64_be(width: usize, y: u16, a: u16) -> std::vec::Vec<u16> {
let quad = pack_ayuv64(a, y, 32768, 32768);
(0..width)
.flat_map(|_| quad)
.map(|v| u16::from_ne_bytes(v.to_be_bytes()))
.collect()
}
#[test]
#[should_panic(expected = "packed row too short")]
fn ayuv64_dispatcher_rejects_short_packed() {
let packed = [0u16; 8];
let mut rgb = [0u8; 4 * 3];
ayuv64_to_rgb_row(&packed, &mut rgb, 4, ColorMatrix::Bt709, true, false, false);
}
#[test]
#[should_panic(expected = "rgb_out row too short")]
fn ayuv64_dispatcher_rejects_short_rgb_output() {
let packed = [0u16; 4 * 4];
let mut rgb = [0u8; 2];
ayuv64_to_rgb_row(&packed, &mut rgb, 4, ColorMatrix::Bt709, true, false, false);
}
#[test]
#[should_panic(expected = "rgba_out row too short")]
fn ayuv64_dispatcher_rejects_short_rgba_output() {
let packed = [0u16; 4 * 4];
let mut rgba = [0u8; 2];
ayuv64_to_rgba_row(
&packed,
&mut rgba,
4,
ColorMatrix::Bt709,
true,
false,
false,
);
}
#[test]
#[should_panic(expected = "rgb_out row too short")]
fn ayuv64_dispatcher_rejects_short_rgb_u16_output() {
let packed = [0u16; 4 * 4];
let mut rgb = [0u16; 2];
ayuv64_to_rgb_u16_row(&packed, &mut rgb, 4, ColorMatrix::Bt709, true, false, false);
}
#[test]
#[should_panic(expected = "rgba_out row too short")]
fn ayuv64_dispatcher_rejects_short_rgba_u16_output() {
let packed = [0u16; 4 * 4];
let mut rgba = [0u16; 2];
ayuv64_to_rgba_u16_row(
&packed,
&mut rgba,
4,
ColorMatrix::Bt709,
true,
false,
false,
);
}
#[test]
#[should_panic(expected = "luma_out row too short")]
fn ayuv64_dispatcher_rejects_short_luma_output() {
let packed = [0u16; 4 * 4];
let mut luma = [0u8; 2];
ayuv64_to_luma_row(&packed, &mut luma, 4, false, false);
}
#[test]
#[should_panic(expected = "luma_out row too short")]
fn ayuv64_dispatcher_rejects_short_luma_u16_output() {
let packed = [0u16; 4 * 4];
let mut luma = [0u16; 2];
ayuv64_to_luma_u16_row(&packed, &mut luma, 4, false, false);
}
#[test]
fn ayuv64_dispatchers_route_with_simd_false() {
let buf = solid_ayuv64(8, 60160, 0xABCD);
let mut rgb = [0u8; 8 * 3];
ayuv64_to_rgb_row(&buf, &mut rgb, 8, ColorMatrix::Bt709, false, false, false);
for px in rgb.chunks(3) {
assert!(
px[0].abs_diff(255) <= 2,
"R near-white expected, got {}",
px[0]
);
assert_eq!(px[0], px[1], "R ≠ G for neutral chroma");
assert_eq!(px[1], px[2], "G ≠ B for neutral chroma");
}
let mut rgba = [0u8; 8 * 4];
ayuv64_to_rgba_row(&buf, &mut rgba, 8, ColorMatrix::Bt709, false, false, false);
for px in rgba.chunks(4) {
assert!(
px[0].abs_diff(255) <= 2,
"R near-white expected, got {}",
px[0]
);
assert_eq!(
px[3], 0xABu8,
"source α must be depth-converted (>> 8) for u8 RGBA"
);
}
let mut rgb_u16 = [0u16; 8 * 3];
ayuv64_to_rgb_u16_row(
&buf,
&mut rgb_u16,
8,
ColorMatrix::Bt709,
false,
false,
false,
);
for px in rgb_u16.chunks(3) {
assert!(
px[0].abs_diff(0xFFFF) <= 256,
"R u16 near-white expected, got {}",
px[0]
);
assert_eq!(px[0], px[1], "R ≠ G for neutral chroma (u16)");
assert_eq!(px[1], px[2], "G ≠ B for neutral chroma (u16)");
}
let mut rgba_u16 = [0u16; 8 * 4];
ayuv64_to_rgba_u16_row(
&buf,
&mut rgba_u16,
8,
ColorMatrix::Bt709,
false,
false,
false,
);
for px in rgba_u16.chunks(4) {
assert_eq!(
px[3], 0xABCDu16,
"source α must be written direct for u16 RGBA"
);
}
let mut luma = [0u8; 8];
ayuv64_to_luma_row(&buf, &mut luma, 8, false, false);
for &y in &luma {
assert_eq!(y, (60160u16 >> 8) as u8, "luma u8 must be Y >> 8");
}
let mut luma_u16 = [0u16; 8];
ayuv64_to_luma_u16_row(&buf, &mut luma_u16, 8, false, false);
for &y in &luma_u16 {
assert_eq!(y, 60160u16, "luma u16 must be Y direct");
}
}
#[test]
fn ayuv64_be_and_le_dispatchers_agree() {
let le_buf = solid_ayuv64(8, 60160, 0xABCD);
let be_buf = solid_ayuv64_be(8, 60160, 0xABCD);
let mut rgb_le = [0u8; 8 * 3];
let mut rgb_be = [0u8; 8 * 3];
ayuv64_to_rgb_row(
&le_buf,
&mut rgb_le,
8,
ColorMatrix::Bt709,
false,
false,
false,
);
ayuv64_to_rgb_row(
&be_buf,
&mut rgb_be,
8,
ColorMatrix::Bt709,
false,
false,
true,
);
assert_eq!(
rgb_le, rgb_be,
"LE and BE must produce identical RGB output"
);
let mut luma_le = [0u8; 8];
let mut luma_be = [0u8; 8];
ayuv64_to_luma_row(&le_buf, &mut luma_le, 8, false, false);
ayuv64_to_luma_row(&be_buf, &mut luma_be, 8, false, true);
assert_eq!(
luma_le, luma_be,
"LE and BE must produce identical luma output"
);
let mut rgba_u16_le = [0u16; 8 * 4];
let mut rgba_u16_be = [0u16; 8 * 4];
ayuv64_to_rgba_u16_row(
&le_buf,
&mut rgba_u16_le,
8,
ColorMatrix::Bt709,
false,
false,
false,
);
ayuv64_to_rgba_u16_row(
&be_buf,
&mut rgba_u16_be,
8,
ColorMatrix::Bt709,
false,
false,
true,
);
assert_eq!(
rgba_u16_le, rgba_u16_be,
"LE and BE must produce identical u16 RGBA output"
);
}
#[cfg(target_pointer_width = "32")]
const OVERFLOW_WIDTH_TIMES_4: usize = {
(usize::MAX / 4) + 1
};
#[cfg(target_pointer_width = "32")]
#[test]
#[should_panic(expected = "overflows usize")]
fn ayuv64_dispatcher_rejects_width_times_4_overflow() {
let p: [u16; 0] = [];
let mut rgb: [u8; 0] = [];
ayuv64_to_rgb_row(
&p,
&mut rgb,
OVERFLOW_WIDTH_TIMES_4,
ColorMatrix::Bt709,
true,
false,
false,
);
}
}