use std::arch::x86_64::*;
pub(crate) fn contains_simd_u32(vector: &[u32], target: u32) -> bool {
let target_lane = unsafe { _mm256_set1_epi32(target as i32) };
let mut remaining = vector;
while remaining.len() >= 32 {
if compare_32_items_u32(remaining.as_ptr(), target_lane) {
return true;
}
remaining = unsafe { remaining.get_unchecked(32..) };
continue;
}
if remaining.len() >= 16 {
if compare_16_items_u32(remaining.as_ptr(), target_lane) {
return true;
}
remaining = unsafe { remaining.get_unchecked(16..) };
}
if remaining.len() >= 8 {
if compare_8_items_u32(remaining.as_ptr(), target_lane) {
return true;
}
remaining = unsafe { remaining.get_unchecked(8..) };
}
if remaining.len() >= 4 {
if compare_4_items_u32(remaining.as_ptr(), target) {
return true;
}
remaining = unsafe { remaining.get_unchecked(4..) };
}
for val in remaining {
if *val == target {
return true;
}
}
false
}
#[inline(always)]
fn compare_32_items_u32(chunk: *const u32, target_lane: __m256i) -> bool {
compare_16_items_u32(chunk, target_lane)
|| compare_16_items_u32(
unsafe { chunk.add(16) },
target_lane,
)
}
#[inline(always)]
fn compare_16_items_u32(chunk: *const u32, target_lane: __m256i) -> bool {
compare_8_items_u32(chunk, target_lane)
|| compare_8_items_u32(
unsafe { chunk.add(8) },
target_lane,
)
}
#[inline(always)]
fn compare_8_items_u32(chunk: *const u32, target_lane: __m256i) -> bool {
unsafe {
let current_block = _mm256_loadu_si256(chunk as *const __m256i);
let comparison = _mm256_cmpeq_epi32(current_block, target_lane);
_mm256_movemask_ps(_mm256_castsi256_ps(comparison)) != 0
}
}
#[inline(always)]
fn compare_4_items_u32(chunk: *const u32, target: u32) -> bool {
unsafe {
let current_block = _mm_loadu_si128(chunk as *const __m128i);
let target_lane = _mm_set1_epi32(target as i32);
let comparison = _mm_cmpeq_epi32(current_block, target_lane);
_mm_movemask_ps(_mm_castsi128_ps(comparison)) != 0
}
}
pub(crate) fn contains_simd_u64(vector: &[u64], target: u64) -> bool {
let target_lane = unsafe { _mm256_set1_epi64x(target as i64) };
let mut remaining = vector;
while remaining.len() >= 32 {
if compare_32_items_u64(remaining.as_ptr(), target_lane) {
return true;
}
remaining = unsafe { remaining.get_unchecked(32..) };
continue;
}
if remaining.len() >= 16 {
if compare_16_items_u64(remaining.as_ptr(), target_lane) {
return true;
}
remaining = unsafe { remaining.get_unchecked(16..) };
}
if remaining.len() >= 8 {
if compare_8_items_u64(remaining.as_ptr(), target_lane) {
return true;
}
remaining = unsafe { remaining.get_unchecked(8..) };
}
if remaining.len() >= 4 {
if compare_4_items_u64(remaining.as_ptr(), target_lane) {
return true;
}
remaining = unsafe { remaining.get_unchecked(4..) };
}
if remaining.len() >= 2 {
if compare_2_items_u64(remaining.as_ptr(), target) {
return true;
}
remaining = unsafe { remaining.get_unchecked(2..) };
}
for val in remaining {
if *val == target {
return true;
}
}
false
}
#[inline(always)]
fn compare_32_items_u64(chunk: *const u64, target_lane: __m256i) -> bool {
compare_16_items_u64(chunk, target_lane)
|| compare_16_items_u64(
unsafe { chunk.add(16) },
target_lane,
)
}
#[inline(always)]
fn compare_16_items_u64(chunk: *const u64, target_lane: __m256i) -> bool {
compare_8_items_u64(chunk, target_lane)
|| compare_8_items_u64(
unsafe { chunk.add(8) },
target_lane,
)
}
#[inline(always)]
fn compare_8_items_u64(chunk: *const u64, target_lane: __m256i) -> bool {
compare_4_items_u64(chunk, target_lane)
|| compare_4_items_u64(
unsafe { chunk.add(4) },
target_lane,
)
}
#[inline(always)]
fn compare_4_items_u64(chunk: *const u64, target_lane: __m256i) -> bool {
unsafe {
let current_block = _mm256_loadu_si256(chunk as *const __m256i);
let comparison = _mm256_cmpeq_epi64(current_block, target_lane);
_mm256_movemask_pd(_mm256_castsi256_pd(comparison)) != 0
}
}
#[inline(always)]
fn compare_2_items_u64(chunk: *const u64, target: u64) -> bool {
unsafe {
let current_block = _mm_loadu_si128(chunk as *const __m128i);
let target_lane = _mm_set1_epi64x(target as i64);
let comparison = _mm_cmpeq_epi64(current_block, target_lane);
_mm_movemask_pd(_mm_castsi128_pd(comparison)) != 0
}
}