#![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#![allow(dead_code)]
#[cfg(target_arch = "x86")]
use core::arch::x86::{__m128i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8};
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::{
__m128i, _mm_cmpeq_epi8, _mm_crc32_u64, _mm_loadu_si128, _mm_movemask_epi8,
};
use super::scalar;
pub(crate) const KERNEL_TAG: &str = "sse42";
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
#[inline]
pub(crate) unsafe fn hash_mix_u64(value: u64) -> u64 {
let crc = unsafe { _mm_crc32_u64(0, value) };
((crc << 32) ^ value.rotate_left(13)).wrapping_mul(scalar::HASH_MIX_PRIME)
}
#[cfg(target_arch = "x86")]
#[target_feature(enable = "sse4.2")]
#[inline]
pub(crate) unsafe fn hash_mix_u64(value: u64) -> u64 {
scalar::hash_mix_u64(value)
}
#[target_feature(enable = "sse4.2")]
#[inline]
pub(crate) unsafe fn prefix_len_simd(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
let mut off = 0usize;
while off + 16 <= max {
let a: __m128i = unsafe { _mm_loadu_si128(lhs.add(off).cast::<__m128i>()) };
let b: __m128i = unsafe { _mm_loadu_si128(rhs.add(off).cast::<__m128i>()) };
let eq = unsafe { _mm_cmpeq_epi8(a, b) };
let mask = unsafe { _mm_movemask_epi8(eq) } as u32;
if mask != 0xFFFF {
return off + (!mask).trailing_zeros() as usize;
}
off += 16;
}
off
}
#[target_feature(enable = "sse4.2")]
#[inline]
pub(crate) unsafe fn common_prefix_len_ptr(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
let off = unsafe { prefix_len_simd(lhs, rhs, max) };
unsafe { scalar::common_prefix_len_scalar_ptr(lhs, rhs, off, max) }
}
#[target_feature(enable = "sse4.2")]
#[inline]
pub(crate) unsafe fn count_match_from_indices(
concat: &[u8],
current_idx: usize,
candidate_idx: usize,
tail_limit: usize,
seed_len: usize,
) -> usize {
let seed = seed_len.min(tail_limit);
if seed == tail_limit {
return seed;
}
let remaining = tail_limit - seed;
let base = concat.as_ptr();
let lhs = unsafe { base.add(candidate_idx + seed) };
let rhs = unsafe { base.add(current_idx + seed) };
let extra = unsafe { common_prefix_len_ptr(lhs, rhs, remaining) };
seed + extra
}