#[cfg(any(
target_arch = "aarch64",
target_arch = "x86_64",
target_arch = "wasm32"
))]
use crate::row::arch;
#[cfg(target_arch = "aarch64")]
use crate::row::neon_available;
#[cfg(target_arch = "wasm32")]
use crate::row::simd128_available;
#[cfg(target_arch = "x86_64")]
use crate::row::{avx2_available, avx512_available, sse41_available};
use crate::{
ColorMatrix,
row::{rgb_row_bytes, rgb_row_elems, rgba_row_bytes, rgba_row_elems, scalar},
};
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgb_row_endian(
y: &[u16],
u_half: &[u16],
v_half: &[u16],
rgb_out: &mut [u8],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
big_endian: bool,
) {
assert_eq!(width & 1, 0, "YUV 4:2:0 requires even width");
let rgb_min = rgb_row_bytes(width);
assert!(y.len() >= width, "y row too short");
assert!(u_half.len() >= width / 2, "u_half row too short");
assert!(v_half.len() >= width / 2, "v_half row too short");
assert!(rgb_out.len() >= rgb_min, "rgb_out row too short");
macro_rules! dispatch_be {
($call_le:expr, $call_be:expr) => {
if big_endian { $call_be } else { $call_le }
};
}
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
dispatch_be!(
unsafe { arch::neon::yuv_420p_n_to_rgb_row::<10, false>(y, u_half, v_half, rgb_out, width, matrix, full_range); },
unsafe { arch::neon::yuv_420p_n_to_rgb_row::<10, true>(y, u_half, v_half, rgb_out, width, matrix, full_range); }
);
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
dispatch_be!(
unsafe { arch::x86_avx512::yuv_420p_n_to_rgb_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); },
unsafe { arch::x86_avx512::yuv_420p_n_to_rgb_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); }
);
return;
}
if avx2_available() {
dispatch_be!(
unsafe { arch::x86_avx2::yuv_420p_n_to_rgb_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); },
unsafe { arch::x86_avx2::yuv_420p_n_to_rgb_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); }
);
return;
}
if sse41_available() {
dispatch_be!(
unsafe { arch::x86_sse41::yuv_420p_n_to_rgb_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); },
unsafe { arch::x86_sse41::yuv_420p_n_to_rgb_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); }
);
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
dispatch_be!(
unsafe { arch::wasm_simd128::yuv_420p_n_to_rgb_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); },
unsafe { arch::wasm_simd128::yuv_420p_n_to_rgb_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); }
);
return;
}
},
_ => {}
}
}
dispatch_be!(
scalar::yuv_420p_n_to_rgb_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range
),
scalar::yuv_420p_n_to_rgb_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range
)
);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgb_row(
y: &[u16],
u_half: &[u16],
v_half: &[u16],
rgb_out: &mut [u8],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
) {
yuv420p10_to_rgb_row_endian(
y, u_half, v_half, rgb_out, width, matrix, full_range, use_simd, false,
);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgb_u16_row_endian(
y: &[u16],
u_half: &[u16],
v_half: &[u16],
rgb_out: &mut [u16],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
big_endian: bool,
) {
assert_eq!(width & 1, 0, "YUV 4:2:0 requires even width");
let rgb_min = rgb_row_elems(width);
assert!(y.len() >= width, "y row too short");
assert!(u_half.len() >= width / 2, "u_half row too short");
assert!(v_half.len() >= width / 2, "v_half row too short");
assert!(rgb_out.len() >= rgb_min, "rgb_out row too short");
macro_rules! dispatch_be {
($call_le:expr, $call_be:expr) => {
if big_endian { $call_be } else { $call_le }
};
}
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
dispatch_be!(
unsafe { arch::neon::yuv_420p_n_to_rgb_u16_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); },
unsafe { arch::neon::yuv_420p_n_to_rgb_u16_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); }
);
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
dispatch_be!(
unsafe { arch::x86_avx512::yuv_420p_n_to_rgb_u16_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); },
unsafe { arch::x86_avx512::yuv_420p_n_to_rgb_u16_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); }
);
return;
}
if avx2_available() {
dispatch_be!(
unsafe { arch::x86_avx2::yuv_420p_n_to_rgb_u16_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); },
unsafe { arch::x86_avx2::yuv_420p_n_to_rgb_u16_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); }
);
return;
}
if sse41_available() {
dispatch_be!(
unsafe { arch::x86_sse41::yuv_420p_n_to_rgb_u16_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); },
unsafe { arch::x86_sse41::yuv_420p_n_to_rgb_u16_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); }
);
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
dispatch_be!(
unsafe { arch::wasm_simd128::yuv_420p_n_to_rgb_u16_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); },
unsafe { arch::wasm_simd128::yuv_420p_n_to_rgb_u16_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
); }
);
return;
}
},
_ => {}
}
}
dispatch_be!(
scalar::yuv_420p_n_to_rgb_u16_row::<10, false>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
),
scalar::yuv_420p_n_to_rgb_u16_row::<10, true>(
y, u_half, v_half, rgb_out, width, matrix, full_range,
)
);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgb_u16_row(
y: &[u16],
u_half: &[u16],
v_half: &[u16],
rgb_out: &mut [u16],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
) {
yuv420p10_to_rgb_u16_row_endian(
y, u_half, v_half, rgb_out, width, matrix, full_range, use_simd, false,
);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgba_row_endian(
y: &[u16],
u_half: &[u16],
v_half: &[u16],
rgba_out: &mut [u8],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
big_endian: bool,
) {
assert_eq!(width & 1, 0, "YUV 4:2:0 requires even width");
let rgba_min = rgba_row_bytes(width);
assert!(y.len() >= width, "y row too short");
assert!(u_half.len() >= width / 2, "u_half row too short");
assert!(v_half.len() >= width / 2, "v_half row too short");
assert!(rgba_out.len() >= rgba_min, "rgba_out row too short");
macro_rules! dispatch_be {
($call_le:expr, $call_be:expr) => {
if big_endian { $call_be } else { $call_le }
};
}
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
dispatch_be!(
unsafe { arch::neon::yuv_420p_n_to_rgba_row::<10, false>(y, u_half, v_half, rgba_out, width, matrix, full_range); },
unsafe { arch::neon::yuv_420p_n_to_rgba_row::<10, true>(y, u_half, v_half, rgba_out, width, matrix, full_range); }
);
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
dispatch_be!(
unsafe { arch::x86_avx512::yuv_420p_n_to_rgba_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); },
unsafe { arch::x86_avx512::yuv_420p_n_to_rgba_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); }
);
return;
}
if avx2_available() {
dispatch_be!(
unsafe { arch::x86_avx2::yuv_420p_n_to_rgba_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); },
unsafe { arch::x86_avx2::yuv_420p_n_to_rgba_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); }
);
return;
}
if sse41_available() {
dispatch_be!(
unsafe { arch::x86_sse41::yuv_420p_n_to_rgba_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); },
unsafe { arch::x86_sse41::yuv_420p_n_to_rgba_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); }
);
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
dispatch_be!(
unsafe { arch::wasm_simd128::yuv_420p_n_to_rgba_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); },
unsafe { arch::wasm_simd128::yuv_420p_n_to_rgba_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); }
);
return;
}
},
_ => {}
}
}
dispatch_be!(
scalar::yuv_420p_n_to_rgba_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
),
scalar::yuv_420p_n_to_rgba_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
)
);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgba_row(
y: &[u16],
u_half: &[u16],
v_half: &[u16],
rgba_out: &mut [u8],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
) {
yuv420p10_to_rgba_row_endian(
y, u_half, v_half, rgba_out, width, matrix, full_range, use_simd, false,
);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgba_u16_row_endian(
y: &[u16],
u_half: &[u16],
v_half: &[u16],
rgba_out: &mut [u16],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
big_endian: bool,
) {
assert_eq!(width & 1, 0, "YUV 4:2:0 requires even width");
let rgba_min = rgba_row_elems(width);
assert!(y.len() >= width, "y row too short");
assert!(u_half.len() >= width / 2, "u_half row too short");
assert!(v_half.len() >= width / 2, "v_half row too short");
assert!(rgba_out.len() >= rgba_min, "rgba_out row too short");
macro_rules! dispatch_be {
($call_le:expr, $call_be:expr) => {
if big_endian { $call_be } else { $call_le }
};
}
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
dispatch_be!(
unsafe { arch::neon::yuv_420p_n_to_rgba_u16_row::<10, false>(y, u_half, v_half, rgba_out, width, matrix, full_range); },
unsafe { arch::neon::yuv_420p_n_to_rgba_u16_row::<10, true>(y, u_half, v_half, rgba_out, width, matrix, full_range); }
);
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
dispatch_be!(
unsafe { arch::x86_avx512::yuv_420p_n_to_rgba_u16_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); },
unsafe { arch::x86_avx512::yuv_420p_n_to_rgba_u16_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); }
);
return;
}
if avx2_available() {
dispatch_be!(
unsafe { arch::x86_avx2::yuv_420p_n_to_rgba_u16_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); },
unsafe { arch::x86_avx2::yuv_420p_n_to_rgba_u16_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); }
);
return;
}
if sse41_available() {
dispatch_be!(
unsafe { arch::x86_sse41::yuv_420p_n_to_rgba_u16_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); },
unsafe { arch::x86_sse41::yuv_420p_n_to_rgba_u16_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); }
);
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
dispatch_be!(
unsafe { arch::wasm_simd128::yuv_420p_n_to_rgba_u16_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); },
unsafe { arch::wasm_simd128::yuv_420p_n_to_rgba_u16_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
); }
);
return;
}
},
_ => {}
}
}
dispatch_be!(
scalar::yuv_420p_n_to_rgba_u16_row::<10, false>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
),
scalar::yuv_420p_n_to_rgba_u16_row::<10, true>(
y, u_half, v_half, rgba_out, width, matrix, full_range,
)
);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgba_u16_row(
y: &[u16],
u_half: &[u16],
v_half: &[u16],
rgba_out: &mut [u16],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
) {
yuv420p10_to_rgba_u16_row_endian(
y, u_half, v_half, rgba_out, width, matrix, full_range, use_simd, false,
);
}
#[cfg(all(test, feature = "std"))]
mod be_parity_tests {
use super::*;
fn split_le_be(intended: &[u16]) -> (std::vec::Vec<u16>, std::vec::Vec<u16>) {
let le_bytes: std::vec::Vec<u8> = intended.iter().flat_map(|v| v.to_le_bytes()).collect();
let be_bytes: std::vec::Vec<u8> = intended.iter().flat_map(|v| v.to_be_bytes()).collect();
let le: std::vec::Vec<u16> = le_bytes
.chunks_exact(2)
.map(|b| u16::from_ne_bytes([b[0], b[1]]))
.collect();
let be: std::vec::Vec<u16> = be_bytes
.chunks_exact(2)
.map(|b| u16::from_ne_bytes([b[0], b[1]]))
.collect();
(le, be)
}
fn pseudo_y(width: usize, seed: u32) -> std::vec::Vec<u16> {
(0..width)
.map(|i| ((seed.wrapping_mul(i as u32 + 1).wrapping_add(0x55)) & 0x3FF) as u16)
.collect()
}
fn pseudo_uv(half: usize, seed: u32) -> std::vec::Vec<u16> {
(0..half)
.map(|i| ((seed.wrapping_mul(i as u32 + 7).wrapping_add(0x123)) & 0x3FF) as u16)
.collect()
}
#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn yuv420p10_dispatch_be_le_parity_simd_and_scalar() {
for w in [8usize, 16, 24] {
let half = w / 2;
let y_int = pseudo_y(w, 0xA17F);
let u_int = pseudo_uv(half, 0xC0DE);
let v_int = pseudo_uv(half, 0xBEEF);
let (y_le, y_be) = split_le_be(&y_int);
let (u_le, u_be) = split_le_be(&u_int);
let (v_le, v_be) = split_le_be(&v_int);
for &use_simd in &[false, true] {
let mut out_le = std::vec![0u8; w * 3];
let mut out_be = std::vec![0u8; w * 3];
yuv420p10_to_rgb_row_endian(
&y_le,
&u_le,
&v_le,
&mut out_le,
w,
ColorMatrix::Bt709,
false,
use_simd,
false,
);
yuv420p10_to_rgb_row_endian(
&y_be,
&u_be,
&v_be,
&mut out_be,
w,
ColorMatrix::Bt709,
false,
use_simd,
true,
);
assert_eq!(
out_le, out_be,
"yuv420p10 rgb BE/LE parity (w={w}, simd={use_simd})"
);
let mut out_le16 = std::vec![0u16; w * 3];
let mut out_be16 = std::vec![0u16; w * 3];
yuv420p10_to_rgb_u16_row_endian(
&y_le,
&u_le,
&v_le,
&mut out_le16,
w,
ColorMatrix::Bt709,
false,
use_simd,
false,
);
yuv420p10_to_rgb_u16_row_endian(
&y_be,
&u_be,
&v_be,
&mut out_be16,
w,
ColorMatrix::Bt709,
false,
use_simd,
true,
);
assert_eq!(out_le16, out_be16, "yuv420p10 rgb_u16 BE/LE parity");
let mut out_le4 = std::vec![0u8; w * 4];
let mut out_be4 = std::vec![0u8; w * 4];
yuv420p10_to_rgba_row_endian(
&y_le,
&u_le,
&v_le,
&mut out_le4,
w,
ColorMatrix::Bt709,
false,
use_simd,
false,
);
yuv420p10_to_rgba_row_endian(
&y_be,
&u_be,
&v_be,
&mut out_be4,
w,
ColorMatrix::Bt709,
false,
use_simd,
true,
);
assert_eq!(out_le4, out_be4, "yuv420p10 rgba BE/LE parity");
let mut out_le4u = std::vec![0u16; w * 4];
let mut out_be4u = std::vec![0u16; w * 4];
yuv420p10_to_rgba_u16_row_endian(
&y_le,
&u_le,
&v_le,
&mut out_le4u,
w,
ColorMatrix::Bt709,
false,
use_simd,
false,
);
yuv420p10_to_rgba_u16_row_endian(
&y_be,
&u_be,
&v_be,
&mut out_be4u,
w,
ColorMatrix::Bt709,
false,
use_simd,
true,
);
assert_eq!(out_le4u, out_be4u, "yuv420p10 rgba_u16 BE/LE parity");
}
}
}
}