use core::ops::BitAnd;
use super::*;
#[derive(Copy, Clone)]
pub struct SSSE3(());
impl SSSE3 {
#[cfg(any(doc, target_feature = "ssse3"))]
pub const fn new() -> Self {
Self(())
}
}
#[cfg(any(doc, target_feature = "ssse3"))]
impl Default for SSSE3 {
fn default() -> Self {
Self::new()
}
}
impl Features<SSE> for SSSE3 {
fn query(support: &RuntimeSupport) -> Option<Self> {
support.ssse3().then_some(Self(()))
}
}
impl Feature<SSE> for SSSE3 {}
impl_simd!(SSE[SSSE3: "ssse3"], [i8; 16] {
#[intrinsic_for("pabsb")]
#[intel_equivalents("_mm_abs_epi8")]
pub fn abs(self => this) -> Self {
simd_select(
simd_lt::<_, i8x16>(this.primitive, vector![0i8; 16].primitive),
simd_neg(this.primitive),
this.primitive)
}
});
impl_simd!(SSE[SSSE3: "ssse3"], [i16; 8] {
#[intrinsic_for("pabsw")]
#[intel_equivalents("_mm_abs_epi16")]
pub fn abs(self => this) -> Self {
simd_select(
simd_lt::<_, i16x8>(this.primitive, vector![0i16; 8].primitive),
simd_neg(this.primitive),
this.primitive)
}
});
impl_simd!(SSE[SSSE3: "ssse3"], [i32; 4] {
#[intrinsic_for("pabsd")]
#[intel_equivalents("_mm_abs_epi32")]
pub fn abs(self => this) -> Self {
simd_select(
simd_lt::<_, i32x4>(this.primitive, vector![0i32; 4].primitive),
simd_neg(this.primitive),
this.primitive)
}
});
impl_simd!(SSE[SSE2, SSSE3: "sse2,ssse3"], [u8; 16] {
pub fn index(self => this, idxs: Vector<[u8; 16], SSE, FS>) -> Self {
this.masking_index(idxs.bitand(imm!(0x0F, u8)).cast()).primitive
}
#[intrinsic_for("pshufb")]
#[intel_equivalents("_mm_shuffle_epi8")]
pub fn masking_index(
self => this,
idxs: Vector<[i8; 16], SSE, FS>,
) -> Self {
decl_llvm_func!(
"llvm.x86.ssse3.pshuf.b.128" as
pshufb(v: u8x16, i: i8x16) -> u8x16);
pshufb(this.primitive, idxs.primitive)
}
});
impl_simd!(SSE[SSE2, SSSE3: "sse2,ssse3"], [i8; 16] {
pub fn index(self => this, idxs: Vector<[u8; 16], SSE, FS>) -> Self {
this.masking_index(idxs.bitand(imm!(0x0F, u8)).cast()).primitive
}
#[intrinsic_for("pshufb")]
#[intel_equivalents("_mm_shuffle_epi8")]
pub fn masking_index(
self => this,
idxs: Vector<[i8; 16], SSE, FS>,
) -> Self {
decl_llvm_func!(
"llvm.x86.ssse3.pshuf.b.128" as
pshufb(v: i8x16, i: i8x16) -> i8x16);
pshufb(this.primitive, idxs.primitive)
}
});
impl_simd!(SSE[SSSE3: "ssse3"], [u8; 16] {
#[intrinsic_for("palignr")]
#[intel_equivalents("_mm_alignr_epi8")]
pub fn concat_and_slice<Offset: Imm<u8>>(
self => this, that: Self, _offset: Offset
) -> Self {
const_assert!(Offset::VAL <= 16);
if Offset::VAL == 0 { return this.primitive; }
if Offset::VAL == 16 { return that.primitive; }
simd_shuffle(this.primitive, that.primitive, const {
simd_slice_indices::<16>(Offset::VAL as usize)
})
}
});
impl_simd!(SSE[SSSE3: "ssse3"], [i8; 16] {
#[intrinsic_for("palignr")]
#[intel_equivalents("_mm_alignr_epi8")]
pub fn concat_and_slice<Offset: Imm<u8>>(
self => this, that: Self, _offset: Offset
) -> Self {
const_assert!(Offset::VAL <= 16);
if Offset::VAL == 0 { return this.primitive; }
if Offset::VAL == 16 { return that.primitive; }
simd_shuffle(this.primitive, that.primitive, const {
simd_slice_indices::<16>(Offset::VAL as usize)
})
}
});
#[cfg(test)]
#[cfg(target_feature = "ssse3")]
mod tests {
use core::fmt::Debug;
use super::*;
use proptest::prelude::*;
use proptest::test_runner::TestCaseResult;
type Feats = features!(SSE2, SSSE3);
const FEATS: FeatureSet<SSE, Feats> =
FeatureSet::new((SSE2::new(), (SSSE3::new(), ())));
type Vector<T> = super::Vector<T, SSE, Feats>;
fn make<E, const LEN: usize>(a: [E; LEN]) -> Vector<[E; LEN]>
where E: Element<LEN> + Movable<SSE, LEN> {
super::Vector::load(&a, FEATS)
}
fn test_una_map<T, R, const LEN: usize>(
v: [T; LEN],
vop: impl FnOnce(Vector<[T; LEN]>) -> Vector<[R; LEN]>,
uop: impl Fn(T) -> R
) -> TestCaseResult
where T: Element<LEN> + Movable<SSE, LEN>,
R: Element<LEN> + PartialEq + Debug {
let x = (vop)(make(v));
let a = v.map(uop);
prop_assert_eq!(x.as_array(), &a);
Ok(())
}
proptest! {
#[test]
fn abs_i8(v: [i8; 16]) {
test_una_map(v, |v| v.abs(), |v| v.wrapping_abs())?;
}
#[test]
fn abs_i16(v: [i16; 8]) {
test_una_map(v, |v| v.abs(), |v| v.wrapping_abs())?;
}
#[test]
fn abs_i32(v: [i32; 4]) {
test_una_map(v, |v| v.abs(), |v| v.wrapping_abs())?;
}
}
proptest! {
#[test]
fn index_u8(v: [u8; 16], i: [u8; 16]) {
let x = make(v).index(make(i));
let a = i.map(|i| v[i as usize % 16]);
prop_assert_eq!(x.as_array(), &a);
}
#[test]
fn masking_index_u8(v: [u8; 16], i: [i8; 16]) {
let x = make(v).masking_index(make(i));
let a = i.map(|i| if i < 0 { 0 } else { v[i as usize % 16] });
prop_assert_eq!(x.as_array(), &a);
}
#[test]
fn index_i8(v: [i8; 16], i: [u8; 16]) {
let x = make(v).index(make(i));
let a = i.map(|i| v[i as usize % 16]);
prop_assert_eq!(x.as_array(), &a);
}
#[test]
fn masking_index_i8(v: [i8; 16], i: [i8; 16]) {
let x = make(v).masking_index(make(i));
let a = i.map(|i| if i < 0 { 0 } else { v[i as usize % 16] });
prop_assert_eq!(x.as_array(), &a);
}
}
proptest! {
#[test]
fn concat_and_slice_u8(v: [u8; 32]) {
let l = Vector::<[u8; 16]>::load(v[..16].try_into().unwrap(), FEATS);
let r = Vector::<[u8; 16]>::load(v[16..].try_into().unwrap(), FEATS);
let x = l.concat_and_slice(r, imm!(0, u8));
prop_assert_eq!(x.as_array(), &v[0..][..16]);
let x = l.concat_and_slice(r, imm!(1, u8));
prop_assert_eq!(x.as_array(), &v[1..][..16]);
let x = l.concat_and_slice(r, imm!(8, u8));
prop_assert_eq!(x.as_array(), &v[8..][..16]);
let x = l.concat_and_slice(r, imm!(16, u8));
prop_assert_eq!(x.as_array(), &v[16..][..16]);
}
#[test]
fn concat_and_slice_i8(v: [i8; 32]) {
let l = Vector::<[i8; 16]>::load(v[..16].try_into().unwrap(), FEATS);
let r = Vector::<[i8; 16]>::load(v[16..].try_into().unwrap(), FEATS);
let x = l.concat_and_slice(r, imm!(0, u8));
prop_assert_eq!(x.as_array(), &v[0..][..16]);
let x = l.concat_and_slice(r, imm!(1, u8));
prop_assert_eq!(x.as_array(), &v[1..][..16]);
let x = l.concat_and_slice(r, imm!(8, u8));
prop_assert_eq!(x.as_array(), &v[8..][..16]);
let x = l.concat_and_slice(r, imm!(16, u8));
prop_assert_eq!(x.as_array(), &v[16..][..16]);
}
}
}