use crate::types::Pos;
#[must_use]
#[inline]
pub fn literal_eq(input: &[u8], pos: Pos, lit: &[u8]) -> bool {
let n = lit.len();
let off = pos as usize;
let end = off + n;
if end > input.len() {
return false;
}
if n == 0 {
return true;
}
let a = input.as_ptr();
let b = lit.as_ptr();
#[cfg(target_arch = "x86_64")]
{
#[cfg(feature = "std")]
let has_avx2 = is_x86_feature_detected!("avx2");
#[cfg(not(feature = "std"))]
let has_avx2 = false;
if n >= 32 && has_avx2 {
return unsafe { eq_avx2(a.add(off), b, n) };
}
if n >= 16 {
return unsafe { eq_sse2(a.add(off), b, n) };
}
}
input[off..end] == *lit
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
#[allow(clippy::cast_ptr_alignment)] #[inline]
unsafe fn load_u128(p: *const u8) -> core::arch::x86_64::__m128i {
use core::arch::x86_64::_mm_loadu_si128;
unsafe { _mm_loadu_si128(p.cast()) }
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn eq_sse2(a: *const u8, b: *const u8, n: usize) -> bool {
use core::arch::x86_64::{_mm_cmpeq_epi8, _mm_movemask_epi8};
let mut i = 0usize;
while i + 16 <= n {
let va = unsafe { load_u128(a.add(i)) };
let vb = unsafe { load_u128(b.add(i)) };
let eq = _mm_cmpeq_epi8(va, vb);
if _mm_movemask_epi8(eq) != 0xFFFF {
return false;
}
i += 16;
}
unsafe { byte_eq_tail(a.add(i), b.add(i), n - i) }
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
#[allow(clippy::cast_ptr_alignment)] #[inline]
unsafe fn load_u256(p: *const u8) -> core::arch::x86_64::__m256i {
use core::arch::x86_64::_mm256_loadu_si256;
unsafe { _mm256_loadu_si256(p.cast()) }
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn eq_avx2(a: *const u8, b: *const u8, n: usize) -> bool {
use core::arch::x86_64::{_mm256_cmpeq_epi8, _mm256_movemask_epi8};
let mut i = 0usize;
while i + 32 <= n {
let va = unsafe { load_u256(a.add(i)) };
let vb = unsafe { load_u256(b.add(i)) };
let eq = _mm256_cmpeq_epi8(va, vb);
if _mm256_movemask_epi8(eq) != -1i32 {
return false;
}
i += 32;
}
if i + 16 <= n {
use core::arch::x86_64::{_mm_cmpeq_epi8, _mm_movemask_epi8};
let va = unsafe { load_u128(a.add(i)) };
let vb = unsafe { load_u128(b.add(i)) };
let eq = _mm_cmpeq_epi8(va, vb);
if _mm_movemask_epi8(eq) != 0xFFFF {
return false;
}
i += 16;
}
unsafe { byte_eq_tail(a.add(i), b.add(i), n - i) }
}
#[inline]
unsafe fn byte_eq_tail(a: *const u8, b: *const u8, n: usize) -> bool {
for i in 0..n {
if unsafe { *a.add(i) } != unsafe { *b.add(i) } {
return false;
}
}
true
}