#[cfg(target_arch = "aarch64")]
use std::arch::aarch64;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64;
use crate::{
frame::{Frame, FrameMut},
ConversionError,
};
#[doc(hidden)]
pub trait RowProcessor: Copy + Clone + Sized + Send + Sync {
#[allow(clippy::too_many_arguments)]
unsafe fn process(
self,
width: usize,
top_uyvy_addr: *const u8,
bot_uyvy_addr: *const u8,
top_y_addr: *mut u8,
bot_y_addr: *mut u8,
u_addr: *mut u8,
v_addr: *mut u8,
);
}
#[allow(clippy::needless_return)] pub fn convert<FI: Frame, FO: FrameMut>(
uyvy_in: &FI,
yuv_out: &mut FO,
) -> Result<(), ConversionError> {
#[cfg(target_arch = "x86_64")]
{
if let Ok(avx2) = ExplicitAvx2DoubleBlock::try_new() {
return convert_with(avx2, uyvy_in, yuv_out);
}
return convert_with(ExplicitSse2::new(), uyvy_in, yuv_out);
}
#[cfg(target_arch = "aarch64")]
return convert_with(ExplicitNeon::new(), uyvy_in, yuv_out);
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
Err(ConversionError("no block processor available"))
}
#[doc(hidden)]
pub fn convert_with<P: RowProcessor, FI: Frame, FO: FrameMut>(
p: P,
uyvy_in: &FI,
yuv_out: &mut FO,
) -> Result<(), ConversionError> {
let (width, height) = uyvy_in.pixel_dimensions();
if uyvy_in.format() != crate::PixelFormat::UYVY422
|| yuv_out.format() != crate::PixelFormat::I420
|| yuv_out.pixel_dimensions() != (width, height)
{
return Err(ConversionError("invalid arguments"));
}
let pixels = width * height;
let uyvy_planes = uyvy_in.planes();
let [uyvy_in] = &uyvy_planes[..] else {
panic!("uyvy_in must have one plane");
};
let mut yuv_planes = yuv_out.planes_mut();
let [y_out, u_out, v_out] = &mut yuv_planes[..] else {
panic!("yuv_out must have three planes");
};
let chroma_width = (width >> 1) + (width & 1);
let chroma_rows = (height >> 1) + (height & 1);
let chroma_size = chroma_width * chroma_rows;
if y_out.stride() != width || u_out.stride() != chroma_width || v_out.stride() != chroma_width {
return Err(ConversionError("output padding unsupported")); }
let uyvy_stride = uyvy_in.stride();
assert!(uyvy_stride >= width * 2);
assert!(uyvy_in.len() >= height * uyvy_stride - (uyvy_stride - width * 2));
let uyvy_in = uyvy_in.as_ptr();
assert_eq!(y_out.len(), pixels);
assert_eq!(u_out.len(), chroma_size);
assert_eq!(v_out.len(), chroma_size);
let y_out = y_out.as_mut_ptr();
let u_out = u_out.as_mut_ptr();
let v_out = v_out.as_mut_ptr();
let mut r = 0;
loop {
if r + 2 > height {
break;
}
unsafe {
p.process(
width,
uyvy_in.add(r * uyvy_stride),
uyvy_in.add((r + 1) * uyvy_stride),
y_out.add(r * width),
y_out.add((r + 1) * width),
u_out.add((r >> 1) * chroma_width),
v_out.add((r >> 1) * chroma_width),
);
}
r += 2;
}
if r < height {
unsafe {
p.process(
width,
uyvy_in.add(r * uyvy_stride),
uyvy_in.add(r * uyvy_stride), y_out.add(r * width),
y_out.add(r * width), u_out.add((r >> 1) * chroma_width),
v_out.add((r >> 1) * chroma_width),
);
}
}
drop(yuv_planes);
Ok(())
}
#[allow(dead_code)] #[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn hexprint(v: std::arch::x86_64::__m256i) -> impl std::fmt::Display {
use std::arch::x86_64::_mm256_extract_epi64;
format!(
"{:032x} {:032x}",
(u128::from(_mm256_extract_epi64(v, 3) as u64) << 64)
| u128::from(_mm256_extract_epi64(v, 2) as u64),
(u128::from(_mm256_extract_epi64(v, 1) as u64) << 64)
| u128::from(_mm256_extract_epi64(v, 0) as u64)
)
}
#[inline(never)]
unsafe fn fallback(
width: usize,
top_uyvy_addr: *const u8,
bot_uyvy_addr: *const u8,
top_y_addr: *mut u8,
bot_y_addr: *mut u8,
u_addr: *mut u8,
v_addr: *mut u8,
) {
for i in 0..width {
std::ptr::write(
top_y_addr.add(i),
std::ptr::read(top_uyvy_addr.add(2 * i + 1)),
);
std::ptr::write(
bot_y_addr.add(i),
std::ptr::read(bot_uyvy_addr.add(2 * i + 1)),
);
}
let avg = |a: u8, b: u8| ((u16::from(a) + u16::from(b) + 1) >> 1) as u8;
let chroma_width = (width >> 1) + (width & 1);
for i in 0..chroma_width {
let top_u = std::ptr::read(top_uyvy_addr.add(4 * i));
let bot_u = std::ptr::read(bot_uyvy_addr.add(4 * i));
let top_v = std::ptr::read(top_uyvy_addr.add(4 * i + 2));
let bot_v = std::ptr::read(bot_uyvy_addr.add(4 * i + 2));
std::ptr::write(u_addr.add(i), avg(top_u, bot_u));
std::ptr::write(v_addr.add(i), avg(top_v, bot_v));
}
}
#[cfg(target_arch = "x86_64")]
#[doc(hidden)]
#[derive(Copy, Clone)]
pub struct ExplicitAvx2DoubleBlock(());
#[cfg(target_arch = "x86_64")]
impl ExplicitAvx2DoubleBlock {
#[inline]
pub fn try_new() -> Result<Self, ConversionError> {
if is_x86_feature_detected!("avx2") {
Ok(Self(()))
} else {
Err(ConversionError("avx2 is not supported on this machine"))
}
}
}
#[cfg(target_arch = "x86_64")]
impl RowProcessor for ExplicitAvx2DoubleBlock {
#[target_feature(enable = "avx2")]
#[inline(never)]
unsafe fn process(
self,
width: usize,
mut top_uyvy_addr: *const u8,
mut bot_uyvy_addr: *const u8,
mut top_y_addr: *mut u8,
mut bot_y_addr: *mut u8,
mut u_addr: *mut u8,
mut v_addr: *mut u8,
) {
let shuf_indices = x86_64::_mm256_broadcastsi128_si256(x86_64::_mm_setr_epi8(
1, 3, 5, 7, 9, 11, 13, 15, 0, 4, 8, 12, 2, 6, 10, 14, ));
let mut i = 0;
const BLOCK_SIZE: usize = 64;
while i + BLOCK_SIZE <= width {
let load = |uyvy_addr: *const u8| -> [_; 4] {
std::array::from_fn(|i| {
let raw = x86_64::_mm256_loadu_si256(uyvy_addr.add(32 * i) as _);
x86_64::_mm256_shuffle_epi8(raw, shuf_indices)
})
};
let top = load(top_uyvy_addr);
let bot = load(bot_uyvy_addr);
for (data, addr) in [(top, top_y_addr), (bot, bot_y_addr)] {
for i in [0, 1] {
let swapped = x86_64::_mm256_unpacklo_epi64(data[2 * i], data[2 * i + 1]);
x86_64::_mm256_storeu_si256(
addr.add(32 * i) as _,
x86_64::_mm256_permute4x64_epi64::<0b11_01_10_00>(swapped),
);
}
}
let uv: [_; 2] = std::array::from_fn(|i| {
x86_64::_mm256_avg_epu8(
x86_64::_mm256_unpackhi_epi32(top[2 * i], top[2 * i + 1]),
x86_64::_mm256_unpackhi_epi32(bot[2 * i], bot[2 * i + 1]),
)
});
let mix = x86_64::_mm256_permute2x128_si256::<0b_00_11_00_01>(uv[0], uv[1]);
let uv0prime =
x86_64::_mm256_inserti128_si256::<1>(uv[0], x86_64::_mm256_castsi256_si128(uv[1]));
x86_64::_mm256_storeu_si256(u_addr as _, x86_64::_mm256_unpacklo_epi32(uv0prime, mix));
x86_64::_mm256_storeu_si256(v_addr as _, x86_64::_mm256_unpackhi_epi32(uv0prime, mix));
i += BLOCK_SIZE;
top_uyvy_addr = top_uyvy_addr.add(2 * BLOCK_SIZE);
bot_uyvy_addr = bot_uyvy_addr.add(2 * BLOCK_SIZE);
top_y_addr = top_y_addr.add(BLOCK_SIZE);
bot_y_addr = bot_y_addr.add(BLOCK_SIZE);
u_addr = u_addr.add(BLOCK_SIZE / 2);
v_addr = v_addr.add(BLOCK_SIZE / 2);
}
if i < width {
fallback(
width - i,
top_uyvy_addr,
bot_uyvy_addr,
top_y_addr,
bot_y_addr,
u_addr,
v_addr,
);
}
}
}
#[cfg(target_arch = "x86_64")]
#[doc(hidden)]
#[derive(Copy, Clone)]
pub struct ExplicitAvx2SingleBlock(());
#[cfg(target_arch = "x86_64")]
impl ExplicitAvx2SingleBlock {
#[inline]
pub fn try_new() -> Result<Self, ConversionError> {
if is_x86_feature_detected!("avx2") {
Ok(Self(()))
} else {
Err(ConversionError("avx2 is not supported on this machine"))
}
}
}
#[cfg(target_arch = "x86_64")]
impl RowProcessor for ExplicitAvx2SingleBlock {
#[inline(never)]
#[target_feature(enable = "avx2")]
unsafe fn process(
self,
width: usize,
mut top_uyvy_addr: *const u8,
mut bot_uyvy_addr: *const u8,
mut top_y_addr: *mut u8,
mut bot_y_addr: *mut u8,
mut u_addr: *mut u8,
mut v_addr: *mut u8,
) {
let shuf_indices = x86_64::_mm256_broadcastsi128_si256(x86_64::_mm_setr_epi8(
1, 3, 5, 7, 9, 11, 13, 15, 0, 4, 8, 12, 2, 6, 10, 14, ));
const BLOCK_SIZE: usize = 32;
let mut i = 0;
while i + BLOCK_SIZE <= width {
let load = |uyvy_addr: *const u8| -> [_; 2] {
std::array::from_fn(|i| {
let raw = x86_64::_mm256_loadu_si256(uyvy_addr.add(32 * i) as _);
x86_64::_mm256_shuffle_epi8(raw, shuf_indices)
})
};
let top = load(top_uyvy_addr);
let bot = load(bot_uyvy_addr);
for (data, y_addr) in [(top, top_y_addr), (bot, bot_y_addr)] {
let y = x86_64::_mm256_unpacklo_epi64(data[0], data[1]);
x86_64::_mm256_storeu_si256(
y_addr as _,
x86_64::_mm256_permute4x64_epi64::<0b11_01_10_00>(y),
);
}
let uv = x86_64::_mm256_avg_epu8(
x86_64::_mm256_unpackhi_epi32(top[0], top[1]),
x86_64::_mm256_unpackhi_epi32(bot[0], bot[1]),
);
let p = x86_64::_mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7);
x86_64::_mm256_storeu2_m128i(
v_addr as _,
u_addr as _,
x86_64::_mm256_permutevar8x32_epi32(uv, p),
);
i += BLOCK_SIZE;
top_uyvy_addr = top_uyvy_addr.add(2 * BLOCK_SIZE);
bot_uyvy_addr = bot_uyvy_addr.add(2 * BLOCK_SIZE);
top_y_addr = top_y_addr.add(BLOCK_SIZE);
bot_y_addr = bot_y_addr.add(BLOCK_SIZE);
u_addr = u_addr.add(BLOCK_SIZE / 2);
v_addr = v_addr.add(BLOCK_SIZE / 2);
}
if i < width {
fallback(
width - i,
top_uyvy_addr,
bot_uyvy_addr,
top_y_addr,
bot_y_addr,
u_addr,
v_addr,
);
}
}
}
#[cfg(target_arch = "x86_64")]
#[doc(hidden)]
#[derive(Copy, Clone, Default)]
pub struct ExplicitSse2(());
#[cfg(target_arch = "x86_64")]
impl ExplicitSse2 {
#[inline]
pub fn new() -> Self {
Self(())
}
#[inline]
pub fn try_new() -> Result<Self, ConversionError> {
Ok(Self::new())
}
}
#[cfg(target_arch = "x86_64")]
impl RowProcessor for ExplicitSse2 {
unsafe fn process(
self,
width: usize,
mut top_uyvy_addr: *const u8,
mut bot_uyvy_addr: *const u8,
mut top_y_addr: *mut u8,
mut bot_y_addr: *mut u8,
mut u_addr: *mut u8,
mut v_addr: *mut u8,
) {
let mut i = 0;
const BLOCK_SIZE: usize = 32;
let low_bits = x86_64::_mm_set1_epi16(0xFF);
while i + BLOCK_SIZE <= width {
let load = |uyvy_addr: *const u8| -> [_; 4] {
std::array::from_fn(|i| x86_64::_mm_loadu_si128(uyvy_addr.add(16 * i) as _))
};
let top_uyvy = load(top_uyvy_addr);
let bot_uyvy = load(bot_uyvy_addr);
for (uyvy, y_addr) in [(top_uyvy, top_y_addr), (bot_uyvy, bot_y_addr)] {
x86_64::_mm_storeu_si128(
y_addr as _,
x86_64::_mm_packus_epi16(
x86_64::_mm_srli_epi16(uyvy[0], 8),
x86_64::_mm_srli_epi16(uyvy[1], 8),
),
);
x86_64::_mm_storeu_si128(
y_addr.add(16) as _,
x86_64::_mm_packus_epi16(
x86_64::_mm_srli_epi16(uyvy[2], 8),
x86_64::_mm_srli_epi16(uyvy[3], 8),
),
);
}
let uv = |uyvy: [x86_64::__m128i; 4]| {
[
x86_64::_mm_packus_epi16(
x86_64::_mm_and_si128(uyvy[0], low_bits),
x86_64::_mm_and_si128(uyvy[1], low_bits),
),
x86_64::_mm_packus_epi16(
x86_64::_mm_and_si128(uyvy[2], low_bits),
x86_64::_mm_and_si128(uyvy[3], low_bits),
),
]
};
let top_uv = uv(top_uyvy);
let bot_uv = uv(bot_uyvy);
let uv = [
x86_64::_mm_avg_epu8(top_uv[0], bot_uv[0]),
x86_64::_mm_avg_epu8(top_uv[1], bot_uv[1]),
];
let u = x86_64::_mm_packus_epi16(
x86_64::_mm_and_si128(uv[0], low_bits),
x86_64::_mm_and_si128(uv[1], low_bits),
);
x86_64::_mm_storeu_si128(u_addr as _, u);
let v = x86_64::_mm_packus_epi16(
x86_64::_mm_srli_epi16(uv[0], 8),
x86_64::_mm_srli_epi16(uv[1], 8),
);
x86_64::_mm_storeu_si128(v_addr as _, v);
i += BLOCK_SIZE;
top_uyvy_addr = top_uyvy_addr.add(2 * BLOCK_SIZE);
bot_uyvy_addr = bot_uyvy_addr.add(2 * BLOCK_SIZE);
top_y_addr = top_y_addr.add(BLOCK_SIZE);
bot_y_addr = bot_y_addr.add(BLOCK_SIZE);
u_addr = u_addr.add(BLOCK_SIZE / 2);
v_addr = v_addr.add(BLOCK_SIZE / 2);
}
if i < width {
fallback(
width - i,
top_uyvy_addr,
bot_uyvy_addr,
top_y_addr,
bot_y_addr,
u_addr,
v_addr,
);
}
}
}
#[cfg(target_arch = "aarch64")]
#[doc(hidden)]
#[derive(Copy, Clone, Default)]
pub struct ExplicitNeon(());
#[cfg(target_arch = "aarch64")]
impl ExplicitNeon {
fn new() -> Self {
Self(())
}
#[inline]
pub fn try_new() -> Result<Self, ConversionError> {
Ok(Self::new())
}
}
#[cfg(target_arch = "aarch64")]
impl RowProcessor for ExplicitNeon {
#[inline(never)]
#[target_feature(enable = "neon")]
unsafe fn process(
self,
width: usize,
top_uyvy_addr: *const u8,
bot_uyvy_addr: *const u8,
top_y_addr: *mut u8,
bot_y_addr: *mut u8,
u_addr: *mut u8,
v_addr: *mut u8,
) {
const BLOCK_SIZE: usize = 32;
let mut i = 0;
while i + BLOCK_SIZE <= width {
let top_uyvy = aarch64::vld4q_u8(top_uyvy_addr.add(2 * i));
let bot_uyvy = aarch64::vld4q_u8(bot_uyvy_addr.add(2 * i));
aarch64::vst2q_u8(
top_y_addr.add(i),
aarch64::uint8x16x2_t(top_uyvy.1, top_uyvy.3),
);
aarch64::vst2q_u8(
bot_y_addr.add(i),
aarch64::uint8x16x2_t(bot_uyvy.1, bot_uyvy.3),
);
aarch64::vst1q_u8(
u_addr.add(i / 2),
aarch64::vrhaddq_u8(top_uyvy.0, bot_uyvy.0),
);
aarch64::vst1q_u8(
v_addr.add(i / 2),
aarch64::vrhaddq_u8(top_uyvy.2, bot_uyvy.2),
);
i += BLOCK_SIZE;
}
if i < width {
fallback(
width - i,
top_uyvy_addr.add(2 * i),
bot_uyvy_addr.add(2 * i),
top_y_addr.add(i),
bot_y_addr.add(i),
u_addr.add(i / 2),
v_addr.add(i / 2),
);
}
}
}
macro_rules! auto {
{$ident:ident supported { $($supported:tt)+ } features { $($feature:literal)* }} => {
#[doc(hidden)]
#[derive(Copy, Clone)]
pub struct $ident<const PIXELS: usize>([(); PIXELS]);
impl<const PIXELS: usize> $ident<PIXELS> {
#[inline(always)]
pub fn try_new() -> Result<Self, ConversionError> {
if true && $($supported)+ {
Ok(Self(std::array::from_fn(|_| ())))
} else {
Err(ConversionError(concat!(stringify!($ident), " unsupported on this machine")))
}
}
}
impl<const PIXELS: usize> RowProcessor for $ident<PIXELS> {
#[inline(never)]
$(#[target_feature(enable = $feature)])*
unsafe fn process(
self,
width: usize,
top_uyvy_addr: *const u8,
bot_uyvy_addr: *const u8,
top_y_addr: *mut u8,
bot_y_addr: *mut u8,
u_addr: *mut u8,
v_addr: *mut u8,
) {
let mut i = 0;
const { assert!(PIXELS % 2 == 0); }
loop {
if i + PIXELS > width {
break;
}
let top_uyvy_addr = top_uyvy_addr.add(2 * i);
let bot_uyvy_addr = bot_uyvy_addr.add(2 * i);
let top_y_addr = top_y_addr.add(i);
let bot_y_addr = bot_y_addr.add(i);
let u_addr = u_addr.add(i / 2);
let v_addr = v_addr.add(i / 2);
for j in 0..PIXELS {
std::ptr::write(top_y_addr.add(j), std::ptr::read(top_uyvy_addr.add(2*j + 1)));
std::ptr::write(bot_y_addr.add(j), std::ptr::read(bot_uyvy_addr.add(2*j + 1)));
}
let avg = |a: u8, b: u8| { (u16::from(a) + u16::from(b) + 1 >> 1) as u8 };
for j in 0..PIXELS/2 {
let top_u = std::ptr::read(top_uyvy_addr.add(4*j));
let bot_u = std::ptr::read(bot_uyvy_addr.add(4*j));
let top_v = std::ptr::read(top_uyvy_addr.add(4*j + 2));
let bot_v = std::ptr::read(bot_uyvy_addr.add(4*j + 2));
std::ptr::write(u_addr.add(j), avg(top_u, bot_u));
std::ptr::write(v_addr.add(j), avg(top_v, bot_v));
}
i += PIXELS;
}
if i < width {
fallback(
width - i,
top_uyvy_addr.add(2 * i),
bot_uyvy_addr.add(2 * i),
top_y_addr.add(i),
bot_y_addr.add(i),
u_addr.add(i / 2),
v_addr.add(i / 2),
);
}
}
}
}
}
#[cfg(target_arch = "x86_64")]
auto! {
AutoAvx2Block
supported { is_x86_feature_detected!("avx2") }
features { "avx2" }
}
#[cfg(target_arch = "aarch64")]
auto! {
AutoNeonBlock
supported { std::arch::is_aarch64_feature_detected!("neon") }
features { "neon" }
}
auto! {
AutoVanillaBlock
supported { true }
features { }
}
#[cfg(test)]
mod tests {
macro_rules! test_processor {
($processor: ty, $mod: ident, $pixels: expr) => {
mod $mod {
use super::super::RowProcessor as _;
type P = $processor;
#[test]
fn y() {
let p = P::try_new().unwrap();
const PIXELS: usize = $pixels;
let mut top_in = vec![0xff; PIXELS * 4];
let mut bot_in = vec![0xff; PIXELS * 4];
for i in 0..PIXELS {
top_in[2 * i + 1] = i as u8;
bot_in[2 * i + 1] = !(i as u8);
}
let mut top_y_out = vec![0xff; PIXELS];
let mut bot_y_out = vec![0xff; PIXELS];
let mut u_out = vec![0xff; PIXELS / 2];
let mut v_out = vec![0xff; PIXELS / 2];
unsafe {
p.process(
PIXELS,
top_in.as_ptr(),
bot_in.as_ptr(),
top_y_out.as_mut_ptr(),
bot_y_out.as_mut_ptr(),
u_out.as_mut_ptr(),
v_out.as_mut_ptr(),
)
}
let expected_top_y_out: [u8; PIXELS] = std::array::from_fn(|i| i as u8);
let expected_bot_y_out: [u8; PIXELS] = std::array::from_fn(|i| !(i as u8));
assert_eq!(&top_y_out[..], &expected_top_y_out[..]);
assert_eq!(&bot_y_out[..], &expected_bot_y_out[..]);
}
#[test]
fn uv() {
let p = P::try_new().unwrap();
const PIXELS: usize = $pixels;
let mut top_in = vec![0xff; PIXELS * 4];
let mut bot_in = vec![0xff; PIXELS * 4];
for i in 0..PIXELS {
top_in[4 * i] = 0x10 + i as u8;
bot_in[4 * i] = 0x30 + i as u8;
top_in[4 * i + 2] = 0x80 + i as u8;
bot_in[4 * i + 2] = 0xa0 + i as u8;
}
let mut top_y_out = vec![0xff; PIXELS];
let mut bot_y_out = vec![0xff; PIXELS];
let mut u_out = vec![0xff; PIXELS / 2];
let mut v_out = vec![0xff; PIXELS / 2];
unsafe {
p.process(
PIXELS,
top_in.as_ptr(),
bot_in.as_ptr(),
top_y_out.as_mut_ptr(),
bot_y_out.as_mut_ptr(),
u_out.as_mut_ptr(),
v_out.as_mut_ptr(),
)
}
let expected_u_out: [u8; PIXELS / 2] = std::array::from_fn(|i| 0x20 + i as u8);
let expected_v_out: [u8; PIXELS / 2] = std::array::from_fn(|i| 0x90 + i as u8);
assert_eq!(&u_out[..], &expected_u_out[..]);
assert_eq!(&v_out[..], &expected_v_out[..]);
}
#[cfg(not(miri))] #[test]
fn full_frame() {
use crate::{
frame::{ConsecutiveFrame, Frame as _},
PixelFormat,
};
const WIDTH: usize = 1280;
const HEIGHT: usize = 720;
let uyvy_in = ConsecutiveFrame::new(PixelFormat::UYVY422, WIDTH, HEIGHT)
.with_storage(&include_bytes!("testdata/in.yuv")[..]);
let mut actual_out =
ConsecutiveFrame::new(PixelFormat::I420, WIDTH, HEIGHT).new_vec();
let expected_out = ConsecutiveFrame::new(PixelFormat::I420, WIDTH, HEIGHT)
.with_storage(&include_bytes!("testdata/out.yuv")[..]);
super::super::convert_with(P::try_new().unwrap(), &uyvy_in, &mut actual_out).unwrap();
assert!(expected_out.planes() == actual_out.planes());
}
#[test]
#[rustfmt::skip]
fn size3x3() {
use crate::{frame::ConsecutiveFrame, PixelFormat};
let uyvy_in = ConsecutiveFrame::new(PixelFormat::UYVY422, 3, 3).with_storage(&[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, ][..]);
let expected_out = ConsecutiveFrame::new(PixelFormat::I420, 3, 3).with_storage(&[
0x01, 0x03, 0x05, 0x0a, 0x0c, 0x0e, 0x13, 0x15, 0x17,
0x05, 0x09, 0x12, 0x16,
0x07, 0x0b, 0x14, 0x18, ][..]);
let mut actual_out =
ConsecutiveFrame::new(PixelFormat::I420, 3, 3).new_vec();
super::super::convert_with(P::try_new().unwrap(), &uyvy_in, &mut actual_out).unwrap();
assert_eq!(expected_out.inner(), actual_out.inner());
}
}
};
}
#[cfg(target_arch = "x86_64")]
#[cfg(not(miri))] test_processor!(
super::super::ExplicitAvx2DoubleBlock,
explicit_double_avx2,
64
);
#[cfg(target_arch = "x86_64")]
#[cfg(not(miri))] test_processor!(
super::super::ExplicitAvx2SingleBlock,
explicit_single_avx2,
32
);
#[cfg(target_arch = "x86_64")]
#[cfg(not(miri))] test_processor!(super::super::ExplicitSse2, explicit_sse2, 32);
#[cfg(target_arch = "x86_64")]
#[cfg(not(miri))] test_processor!(super::super::AutoAvx2Block<32>, auto_avx2, 32);
#[cfg(target_arch = "aarch64")]
test_processor!(super::super::AutoNeonBlock<64>, auto_neon, 64);
#[cfg(target_arch = "aarch64")]
#[cfg(not(miri))] test_processor!(super::super::ExplicitNeon, explicit_neon, 32);
test_processor!(super::super::AutoVanillaBlock<32>, auto, 32);
}