use crate::neon::utils::{neon_store_half_rgb8, neon_store_rgb8, neon_vld_h_rgb, neon_vld_rgb};
use crate::shuffle::ShuffleConverter;
use crate::yuv_support::YuvSourceChannels;
pub(crate) struct ShuffleConverterNeon<const SRC: u8, const DST: u8> {}
impl<const SRC: u8, const DST: u8> Default for ShuffleConverterNeon<SRC, DST> {
fn default() -> Self {
ShuffleConverterNeon {}
}
}
impl<const SRC: u8, const DST: u8> ShuffleConverter<u8, SRC, DST>
for ShuffleConverterNeon<SRC, DST>
{
fn convert(&self, src: &[u8], dst: &mut [u8], width: usize) {
unsafe { shuffle_channels8_impl::<SRC, DST>(src, dst, width) }
}
}
#[inline(always)]
unsafe fn shuffle_channels8_impl<const SRC: u8, const DST: u8>(
src: &[u8],
dst: &mut [u8],
_: usize,
) {
let src_channels: YuvSourceChannels = SRC.into();
let dst_channels: YuvSourceChannels = DST.into();
for (src, dst) in src
.chunks_exact(16 * src_channels.get_channels_count())
.zip(dst.chunks_exact_mut(16 * dst_channels.get_channels_count()))
{
let (a0, b0, c0, d0) = neon_vld_rgb::<SRC>(src.as_ptr());
neon_store_rgb8::<DST>(dst.as_mut_ptr(), a0, b0, c0, d0);
}
let src = src
.chunks_exact(16 * src_channels.get_channels_count())
.remainder();
let dst = dst
.chunks_exact_mut(16 * dst_channels.get_channels_count())
.into_remainder();
for (src, dst) in src
.chunks_exact(8 * src_channels.get_channels_count())
.zip(dst.chunks_exact_mut(8 * dst_channels.get_channels_count()))
{
let (a0, b0, c0, d0) = neon_vld_h_rgb::<SRC>(src.as_ptr());
neon_store_half_rgb8::<DST>(dst.as_mut_ptr(), a0, b0, c0, d0);
}
let src = src
.chunks_exact(8 * src_channels.get_channels_count())
.remainder();
let dst = dst
.chunks_exact_mut(8 * dst_channels.get_channels_count())
.into_remainder();
if !src.is_empty() && !dst.is_empty() {
assert!(src.len() < 64);
assert!(dst.len() < 64);
let mut transient_src: [u8; 64] = [0; 64];
let mut transient_dst: [u8; 64] = [0; 64];
std::ptr::copy_nonoverlapping(src.as_ptr(), transient_src.as_mut_ptr(), src.len());
let (a0, b0, c0, d0) = neon_vld_h_rgb::<SRC>(transient_src.as_ptr());
neon_store_half_rgb8::<DST>(transient_dst.as_mut_ptr(), a0, b0, c0, d0);
std::ptr::copy_nonoverlapping(transient_dst.as_ptr(), dst.as_mut_ptr(), dst.len());
}
}