use std::arch::x86_64::*;
#[target_feature(enable = "avx2")]
pub unsafe fn alignr256_14(a: __m256i, b: __m256i) -> __m256i {
let v = _mm256_permute2x128_si256(b, a, 0x21);
_mm256_alignr_epi8(a, v, 14)
}
#[target_feature(enable = "avx2")]
pub unsafe fn alignr256_15(a: __m256i, b: __m256i) -> __m256i {
let v = _mm256_permute2x128_si256(b, a, 0x21);
_mm256_alignr_epi8(a, v, 15)
}
#[target_feature(enable = "ssse3")]
pub unsafe fn unpack64x128(a: __m128i) -> [u64; 2] {
[
_mm_cvtsi128_si64(a) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(a, 8)) as u64,
]
}
#[target_feature(enable = "avx2")]
pub unsafe fn unpack64x256(a: __m256i) -> [u64; 4] {
let lo = _mm256_extracti128_si256(a, 0);
let hi = _mm256_extracti128_si256(a, 1);
[
_mm_cvtsi128_si64(lo) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64,
_mm_cvtsi128_si64(hi) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64,
]
}
#[target_feature(enable = "avx2")]
pub unsafe fn unpacklo64x256(a: __m256i, b: __m256i) -> [u64; 4] {
let lo = _mm256_castsi256_si128(a);
let hi = _mm256_castsi256_si128(b);
[
_mm_cvtsi128_si64(lo) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64,
_mm_cvtsi128_si64(hi) as u64,
_mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64,
]
}
#[target_feature(enable = "ssse3")]
pub unsafe fn is_all_zeroes128(a: __m128i) -> bool {
let cmp = _mm_cmpeq_epi8(a, zeroes128());
_mm_movemask_epi8(cmp) as u32 == 0xFFFF
}
#[target_feature(enable = "avx2")]
pub unsafe fn is_all_zeroes256(a: __m256i) -> bool {
let cmp = _mm256_cmpeq_epi8(a, zeroes256());
_mm256_movemask_epi8(cmp) as u32 == 0xFFFFFFFF
}
#[target_feature(enable = "sse2")]
pub unsafe fn loadu128(slice: &[u8], at: usize) -> __m128i {
let ptr = slice.get_unchecked(at..).as_ptr();
_mm_loadu_si128(ptr as *const u8 as *const __m128i)
}
#[target_feature(enable = "avx2")]
pub unsafe fn loadu256(slice: &[u8], at: usize) -> __m256i {
let ptr = slice.get_unchecked(at..).as_ptr();
_mm256_loadu_si256(ptr as *const u8 as *const __m256i)
}
#[target_feature(enable = "sse2")]
pub unsafe fn zeroes128() -> __m128i {
_mm_set1_epi8(0)
}
#[target_feature(enable = "avx2")]
pub unsafe fn zeroes256() -> __m256i {
_mm256_set1_epi8(0)
}
#[target_feature(enable = "sse2")]
pub unsafe fn ones128() -> __m128i {
_mm_set1_epi8(0xFF as u8 as i8)
}
#[target_feature(enable = "avx2")]
pub unsafe fn ones256() -> __m256i {
_mm256_set1_epi8(0xFF as u8 as i8)
}