#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(feature = "simd", feature(portable_simd, avx512_target_feature))]
#![allow(incomplete_features)]
#![feature(generic_const_exprs)]
use crate::multiversion::{equal_then_find_second_position_naive, match_naive_directly};
use bitvec::prelude::*;
#[cfg(feature = "simd")]
use {
crate::{
multiversion::simd::{
equal_then_find_second_position_simd_core_simd, match_simd_core, match_simd_select_core,
},
utils::simd::{iterate_haystack_pattern_mask_aligned_simd, SimdBits},
},
core::simd::{LaneCount, SupportedLaneCount},
};
use crate::utils::pad_zeroes_slice_unchecked;
#[derive(Debug, Copy, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[repr(transparent)]
pub struct SignatureMask<const N: usize>(BitArray<[u8; N.div_ceil(u8::BITS as usize)]>)
where
[(); N.div_ceil(u8::BITS as usize)]:;
impl<const N: usize> SignatureMask<N>
where
[(); N.div_ceil(u8::BITS as usize)]:,
{
pub const MAX: Self = Self(BitArray {
data: [u8::MAX; N.div_ceil(u8::BITS as usize)],
..BitArray::ZERO
});
pub const fn all(&self) -> bool {
let mut i = 0;
while i < N {
const BITS: usize = u8::BITS as usize;
let bit = 1 << (i % BITS);
if (self.0.data[i / BITS] & bit) != bit {
return false;
}
i += 1;
}
true
}
}
impl<const N: usize> SignatureMask<N>
where
[(); N.div_ceil(u8::BITS as usize)]:,
{
#[inline(always)]
pub const fn from_bool_array(pattern: [bool; N]) -> Self {
Self::from_bool_slice(&pattern)
}
#[inline(always)]
pub const fn from_bool_slice(pattern: &[bool; N]) -> Self {
Self(Self::from_bool_slice_to_bitarr(pattern))
}
#[inline(always)]
pub const fn from_bool_array_to_bitarr(
pattern: [bool; N],
) -> BitArray<[u8; N.div_ceil(u8::BITS as usize)]> {
Self::from_bool_slice_to_bitarr(&pattern)
}
#[inline(always)]
pub const fn from_bool_slice_to_bitarr(
pattern: &[bool; N],
) -> BitArray<[u8; N.div_ceil(u8::BITS as usize)]> {
let mut arr: BitArray<[u8; N.div_ceil(u8::BITS as usize)]> = BitArray::ZERO;
let mut i = 0;
while i < pattern.len() {
if pattern[i] {
const BITS: usize = u8::BITS as usize;
arr.data[i / BITS] |= 1 << (i % BITS);
}
i += 1;
}
arr
}
#[inline(always)]
pub const fn to_bool_array(&self) -> [bool; N] {
let mut arr: [bool; N] = [false; N];
let mut i = 0;
while i < N {
const BITS: usize = u8::BITS as usize;
let bit = 1 << (i % BITS);
arr[i] = (self.0.data[i / BITS] & bit) == bit;
i += 1;
}
arr
}
}
impl<const N: usize> SignatureMask<N>
where
[(); N.div_ceil(u8::BITS as usize)]:,
{
#[inline(always)]
pub const fn from_byte_array(pattern: [u8; N]) -> Self {
Self::from_byte_slice(&pattern)
}
#[inline(always)]
pub const fn from_byte_slice(pattern: &[u8; N]) -> Self {
match Self::try_from_byte_slice_to_bitarr(pattern) {
Ok(x) => Self(x),
Err(e) => panic!("{}", e),
}
}
#[inline(always)]
pub const fn try_from_byte_array_to_bitarr(
pattern: [u8; N],
) -> Result<BitArray<[u8; N.div_ceil(u8::BITS as usize)]>, &'static str> {
Self::try_from_byte_slice_to_bitarr(&pattern)
}
#[inline(always)]
pub const fn try_from_byte_slice_to_bitarr(
pattern: &[u8; N],
) -> Result<BitArray<[u8; N.div_ceil(u8::BITS as usize)]>, &'static str> {
let mut pattern_bool: [bool; N] = [false; N];
let mut i = 0;
while i < pattern.len() {
pattern_bool[i] = match pattern[i] {
b'x' => true,
b'?' => false,
_ => return Err("unknown character in pattern"),
};
i += 1;
}
Ok(Self::from_bool_slice_to_bitarr(&pattern_bool))
}
#[inline(always)]
pub const fn to_byte_array(&self) -> [u8; N] {
let mut arr: [u8; N] = [b'?'; N];
let mut i = 0;
while i < N {
const BITS: usize = u8::BITS as usize;
let bit = 1 << (i % BITS);
arr[i] = if (self.0.data[i / BITS] & bit) == bit {
b'x'
} else {
b'?'
};
i += 1;
}
arr
}
}
#[derive(Debug, Copy, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[repr(C, align(1))]
pub struct Signature<const N: usize>
where
[(); N.div_ceil(u8::BITS as usize)]:,
{
#[cfg_attr(feature = "serde", serde(with = "serde_big_array::BigArray"))]
pub pattern: [u8; N],
pub mask: SignatureMask<N>,
}
impl<const N: usize> Signature<N>
where
[(); N.div_ceil(u8::BITS as usize)]:,
{
#[inline(always)]
pub const fn from_pattern_mask(pattern: [u8; N], mask: [u8; N]) -> Self {
Self {
pattern,
mask: SignatureMask::from_byte_array(mask),
}
}
#[inline(always)]
pub const fn from_pattern_mask_tuple((pattern, mask): ([u8; N], [u8; N])) -> Self {
Self::from_pattern_mask(pattern, mask)
}
#[inline(always)]
pub const fn from_option_array(needle: [Option<u8>; N]) -> Self {
Self::from_option_slice(&needle)
}
#[inline(always)]
pub const fn from_option_slice(needle: &[Option<u8>; N]) -> Self {
unsafe { Self::from_option_slice_unchecked(needle) }
}
#[inline(always)]
pub const fn from_array_with_exact_match_mask(pattern: [u8; N]) -> Self {
Self {
pattern,
mask: SignatureMask::MAX,
}
}
#[inline(always)]
pub const fn from_slice_with_exact_match_mask(pattern: &[u8; N]) -> Self {
Self::from_array_with_exact_match_mask(*pattern)
}
#[inline(always)]
pub const unsafe fn from_option_slice_unchecked(needle: &[Option<u8>]) -> Self {
let mut pattern: [u8; N] = [0; N];
let mut mask: [bool; N] = [false; N];
let mut i = 0;
while i < needle.len() {
if let Some(x) = needle[i] {
pattern[i] = x;
mask[i] = true;
}
i += 1;
}
Self {
pattern,
mask: SignatureMask::from_bool_array(mask),
}
}
}
impl<const N: usize> Signature<N>
where
[(); N.div_ceil(u8::BITS as usize)]:,
{
#[inline(always)]
pub fn scan<'a>(&self, haystack: &'a [u8]) -> Option<&'a [u8]> {
self.scan_inner(haystack, |chunk| self.match_best_effort(chunk))
}
#[inline(always)]
pub fn scan_naive<'a>(&self, haystack: &'a [u8]) -> Option<&'a [u8]> {
self.scan_inner(haystack, |chunk| {
match_naive_directly(chunk, &self.pattern, &self.mask.0)
})
}
#[inline(always)]
fn scan_inner<'a>(
&self,
mut haystack: &'a [u8],
f: impl Fn(&[u8]) -> bool,
) -> Option<&'a [u8]> {
let exact_match = self.mask.all();
if haystack.len() < N {
if exact_match {
None
} else {
f(unsafe { &pad_zeroes_slice_unchecked::<N>(haystack) }).then_some(haystack)
}
} else {
while !haystack.is_empty() {
let haystack_smaller_than_n = haystack.len() < N;
if exact_match && haystack_smaller_than_n {
return None;
}
let window = if haystack_smaller_than_n {
&unsafe { pad_zeroes_slice_unchecked::<N>(haystack) }
} else {
haystack
};
if f(window) {
return Some(haystack);
}
let move_position = if self.mask.0[0] && !haystack_smaller_than_n {
let first = self.pattern[0];
let potential_position_after_first = {
#[cfg(feature = "simd")]
{
{
if N >= 64 {
equal_then_find_second_position_simd_core_simd::<u64>(
first, window,
)
} else if N >= 32 {
equal_then_find_second_position_simd_core_simd::<u32>(
first, window,
)
} else if N >= 16 {
equal_then_find_second_position_simd_core_simd::<u16>(
first, window,
)
} else if N >= 8 {
equal_then_find_second_position_simd_core_simd::<u8>(
first, window,
)
} else {
equal_then_find_second_position_naive(first, window)
}
}
}
#[cfg(not(feature = "simd"))]
{
equal_then_find_second_position_naive(first, &window)
}
};
potential_position_after_first.unwrap_or(N)
} else {
1
};
haystack = &haystack[move_position..];
}
None
}
}
}
impl<const N: usize> Signature<N>
where
[(); N.div_ceil(u8::BITS as usize)]:,
{
#[inline(always)]
pub fn match_best_effort(&self, chunk: &[u8]) -> bool {
#[cfg(feature = "simd")]
{
if N >= 64 {
self.match_simd::<u64>(chunk)
} else if N >= 32 {
self.match_simd::<u32>(chunk)
} else if N >= 16 {
self.match_simd::<u16>(chunk)
} else if N >= 8 {
self.match_simd::<u8>(chunk)
} else {
self.match_naive(chunk)
}
}
#[cfg(not(feature = "simd"))]
{
self.match_naive(chunk)
}
}
#[inline(always)]
pub fn match_naive(&self, chunk: &[u8]) -> bool {
match_naive_directly(chunk, &self.pattern, &self.mask.0)
}
}
#[cfg(feature = "simd")]
impl<const N: usize> Signature<N>
where
[(); N.div_ceil(u8::BITS as usize)]:,
{
#[inline(always)]
pub fn scan_simd<'a, T: SimdBits>(&self, haystack: &'a [u8]) -> Option<&'a [u8]>
where
LaneCount<{ T::LANES }>: SupportedLaneCount,
{
self.scan_inner(haystack, |chunk| self.match_simd(chunk))
}
#[inline(always)]
pub fn scan_simd_select<'a, T: SimdBits>(&self, haystack: &'a [u8]) -> Option<&'a [u8]>
where
LaneCount<{ T::LANES }>: SupportedLaneCount,
{
self.scan_inner(haystack, |chunk| self.match_simd_select(chunk))
}
#[inline(always)]
pub fn match_simd<T: SimdBits>(&self, chunk: &[u8]) -> bool
where
LaneCount<{ T::LANES }>: SupportedLaneCount,
{
self.match_simd_inner(chunk, |data, pattern, mask: T| {
match_simd_core(data, pattern, mask.to_u64())
})
}
#[inline(always)]
pub fn match_simd_select<T: SimdBits>(&self, chunk: &[u8]) -> bool
where
LaneCount<{ T::LANES }>: SupportedLaneCount,
{
self.match_simd_inner(chunk, |data, pattern, mask: T| {
match_simd_select_core(data, pattern, mask.to_u64())
})
}
#[inline(always)]
pub fn match_simd_inner<T: SimdBits>(
&self,
chunk: &[u8],
f: impl Fn([u8; T::LANES], [u8; T::LANES], T) -> bool,
) -> bool {
iterate_haystack_pattern_mask_aligned_simd(chunk, &self.pattern, &self.mask.0)
.all(|(haystack, pattern, mask)| f(haystack, pattern, mask))
}
}
pub(crate) mod multiversion;
pub(crate) mod utils;