#![allow(unsafe_code)]
use core::mem::transmute;
#[allow(unused)]
macro_rules! expect_texel {
(const $ident:ident: image_texel::Texel<$ty:ty> = ...) => {
const $ident: Texel<$ty> = match Texel::<$ty>::for_type() {
Some(texel) => texel,
None => panic!("Compile error, unexpectedly non-texel"),
};
};
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86_avx2;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86_ssse3;
pub(crate) struct ShuffleOps {
pub(crate) shuffle_u8x4: fn(&mut [[u8; 4]], [u8; 4]),
pub(crate) shuffle_u8x3_to_u8x4: fn(&[[u8; 3]], &mut [[u8; 4]], [u8; 4]),
pub(crate) shuffle_u8x4_to_u8x3: fn(&[[u8; 4]], &mut [[u8; 3]], [u8; 3]),
pub(crate) shuffle_u16x4: fn(&mut [[u16; 4]], [u8; 4]),
pub(crate) shuffle_u16x3_to_u16x4: fn(&[[u16; 3]], &mut [[u16; 4]], [u8; 4]),
pub(crate) shuffle_u16x4_to_u16x3: fn(&[[u16; 4]], &mut [[u16; 3]], [u8; 3]),
}
impl ShuffleOps {
pub fn with_arch(mut self) -> Self {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if std::is_x86_feature_detected!("ssse3") {
self.shuffle_u8x4 = unsafe {
transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u8x4)
};
self.shuffle_u16x4 = unsafe {
transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u16x4)
};
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if std::is_x86_feature_detected!("avx2") {
self.shuffle_u8x4 = unsafe {
transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_avx2::shuffle_u8x4)
};
self.shuffle_u16x4 = unsafe {
transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_avx2::shuffle_u16x4)
};
}
self
}
fn shuffle_u8x4(u8s: &mut [[u8; 4]], idx: [u8; 4]) {
for ch in u8s {
*ch = idx.map(|i| ch[(i & 3) as usize] & as_u8mask(i < 4));
}
}
fn shuffle_u16x4(u8s: &mut [[u16; 4]], idx: [u8; 4]) {
for ch in u8s {
*ch = idx.map(|i| ch[(i & 3) as usize] & as_u16mask(i < 4));
}
}
fn shuffle_u8x3_to_u8x4(u3: &[[u8; 3]], u4: &mut [[u8; 4]], idx: [u8; 4]) {
for (dst, src) in u4.iter_mut().zip(u3) {
*dst = idx.map(|i| src[i.min(2) as usize] & as_u8mask(i < 3));
}
}
fn shuffle_u8x4_to_u8x3(u4: &[[u8; 4]], u3: &mut [[u8; 3]], idx: [u8; 3]) {
for (dst, src) in u3.iter_mut().zip(u4) {
*dst = idx.map(|i| src[(i & 3) as usize] & as_u8mask(i < 4));
}
}
fn shuffle_u16x3_to_u16x4(u3: &[[u16; 3]], u4: &mut [[u16; 4]], idx: [u8; 4]) {
for (dst, src) in u4.iter_mut().zip(u3) {
*dst = idx.map(|i| src[i.min(2) as usize] & as_u16mask(i < 3));
}
}
fn shuffle_u16x4_to_u16x3(u4: &[[u16; 4]], u3: &mut [[u16; 3]], idx: [u8; 3]) {
for (dst, src) in u3.iter_mut().zip(u4) {
*dst = idx.map(|i| src[(i & 3) as usize] & as_u16mask(i < 4));
}
}
}
impl Default for ShuffleOps {
fn default() -> Self {
ShuffleOps {
shuffle_u8x4: Self::shuffle_u8x4,
shuffle_u8x3_to_u8x4: Self::shuffle_u8x3_to_u8x4,
shuffle_u8x4_to_u8x3: Self::shuffle_u8x4_to_u8x3,
shuffle_u16x4: Self::shuffle_u16x4,
shuffle_u16x3_to_u16x4: Self::shuffle_u16x3_to_u16x4,
shuffle_u16x4_to_u16x3: Self::shuffle_u16x4_to_u16x3,
}
}
}
fn as_u8mask(c: bool) -> u8 {
0u8.wrapping_sub(c as u8)
}
fn as_u16mask(c: bool) -> u16 {
0u16.wrapping_sub(c as u16)
}