use crate::insn::MAX_CHAR_SET_LENGTH;
use std::fmt;
extern crate memchr;
pub trait ByteSearcher {
fn find_in(&self, rhs: &[u8]) -> Option<usize>;
}
pub trait ByteSeq: ByteSearcher + std::fmt::Debug + Copy + Clone {
const LENGTH: usize;
fn as_slice(&self) -> &[u8];
fn equals_known_len(&self, rhs: &[u8]) -> bool;
}
extern "C" {
fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32;
}
macro_rules! literal_bytes_impl {
($($N:literal)+) => {
$(
impl ByteSeq for [u8; $N] {
const LENGTH: usize = $N;
#[inline(always)]
fn as_slice(&self) -> &[u8] {
self
}
#[inline(always)]
fn equals_known_len(&self, rhs: &[u8]) -> bool {
debug_assert!(rhs.len() == Self::LENGTH, "Slice has wrong length");
if cfg!(feature = "prohibit-unsafe") {
self == rhs
} else {
unsafe { memcmp(self.as_ptr(), rhs.as_ptr(), Self::LENGTH) == 0}
}
}
}
impl ByteSearcher for [u8; $N] {
#[inline(always)]
fn find_in(&self, rhs: &[u8]) -> Option<usize> {
if $N == 1 {
return memchr::memchr(self[0], rhs)
}
for win in rhs.windows(Self::LENGTH) {
if self.equals_known_len(win) {
return Some((win.as_ptr() as usize) - (rhs.as_ptr() as usize));
}
}
None
}
}
)+
}
}
literal_bytes_impl! {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24}
pub trait ByteSet {
fn contains(&self, b: u8) -> bool;
}
#[derive(Copy, Clone, Debug)]
#[repr(align(4))]
pub struct ByteArraySet<ArraySet: SmallArraySet>(pub ArraySet);
impl<ArraySet: SmallArraySet> ByteArraySet<ArraySet> {
#[inline(always)]
pub fn contains(self, b: u8) -> bool {
self.0.contains(b)
}
}
impl<ArraySet: SmallArraySet> ByteSearcher for ByteArraySet<ArraySet> {
#[inline(always)]
fn find_in(&self, rhs: &[u8]) -> Option<usize> {
self.0.find_in(rhs)
}
}
pub trait SmallArraySet: Copy {
fn contains(self, b: u8) -> bool;
fn find_in(self, rhs: &[u8]) -> Option<usize>;
}
impl SmallArraySet for [u8; 2] {
#[inline(always)]
fn contains(self, b: u8) -> bool {
b == self[0] || b == self[1]
}
#[inline(always)]
fn find_in(self, rhs: &[u8]) -> Option<usize> {
memchr::memchr2(self[0], self[1], rhs)
}
}
impl SmallArraySet for [u8; 3] {
#[inline(always)]
fn contains(self, b: u8) -> bool {
b == self[0] || b == self[1] || b == self[2]
}
#[inline(always)]
fn find_in(self, rhs: &[u8]) -> Option<usize> {
memchr::memchr3(self[0], self[1], self[2], rhs)
}
}
impl SmallArraySet for [u8; 4] {
#[inline(always)]
fn contains(self, b: u8) -> bool {
b == self[0] || b == self[1] || b == self[2] || b == self[3]
}
#[inline(always)]
fn find_in(self, rhs: &[u8]) -> Option<usize> {
for (idx, byte) in rhs.iter().enumerate() {
if self.contains(*byte) {
return Some(idx);
}
}
None
}
}
#[allow(unused_parens)]
#[inline(always)]
pub fn charset_contains(set: &[char; MAX_CHAR_SET_LENGTH], c: char) -> bool {
let mut result = false;
for &v in set.iter() {
result |= (v == c);
}
result
}
fn format_bitmap<Func>(name: &str, f: &mut fmt::Formatter<'_>, contains: Func) -> fmt::Result
where
Func: Fn(u8) -> bool,
{
write!(f, "{}[", name)?;
let mut idx = 0;
let mut maybe_space = "";
while idx <= 256 {
let mut end = idx;
while end <= 256 && contains(end as u8) {
end += 1;
}
match end - idx {
0 => (),
1 => write!(f, "{}{}", maybe_space, idx)?,
_ => write!(f, "{}{}-{}", maybe_space, idx, end - 1)?,
};
if end > idx {
maybe_space = " ";
}
idx = end + 1
}
write!(f, "]")?;
Ok(())
}
#[derive(Default, Copy, Clone)]
#[repr(align(4))]
pub struct AsciiBitmap(pub [u8; 16]);
impl AsciiBitmap {
#[inline(always)]
pub fn set(&mut self, val: u8) {
debug_assert!(val <= 127, "Value should be ASCII");
self.0[(val >> 3) as usize] |= 1 << (val & 0x7);
}
}
impl fmt::Debug for AsciiBitmap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_bitmap("AsciiBitmap", f, |v| self.contains(v))
}
}
impl ByteSet for AsciiBitmap {
#[inline(always)]
fn contains(&self, val: u8) -> bool {
let byte = (val & 0x7F) >> 3;
let bit = val & 0x7;
let mask = ((val >> 7) ^ 1) << bit;
(self.0[byte as usize] & mask) != 0
}
}
#[derive(Default, Copy, Clone)]
#[repr(align(4))]
pub struct ByteBitmap([u16; 16]);
impl ByteBitmap {
pub fn new(bytes: &[u8]) -> ByteBitmap {
let mut bb = ByteBitmap::default();
for &b in bytes {
bb.set(b)
}
bb
}
#[inline(always)]
pub fn contains(&self, val: u8) -> bool {
let byte = val >> 4;
let bit = val & 0xF;
(self.0[byte as usize] & (1 << bit)) != 0
}
#[inline(always)]
pub fn set(&mut self, val: u8) {
let byte = val >> 4;
let bit = val & 0xF;
self.0[byte as usize] |= 1 << bit;
}
pub fn bitor(&mut self, rhs: &ByteBitmap) {
for idx in 0..self.0.len() {
self.0[idx] |= rhs.0[idx];
}
}
pub fn bitnot(&mut self) -> &mut Self {
for val in self.0.iter_mut() {
*val = !*val;
}
self
}
pub fn count_bits(&self) -> u32 {
self.0.iter().map(|v| v.count_ones()).sum()
}
#[allow(clippy::wrong_self_convention)]
pub fn to_vec(&self) -> Vec<u8> {
(0..=255).filter(|b| self.contains(*b)).collect()
}
#[inline(always)]
fn unsafe_find_in_slice(&self, bytes: &[u8]) -> Option<usize> {
type Chunk = u32;
let bm = &self.0;
let mut offset = 0;
let (prefix, body, suffix) = unsafe { bytes.align_to::<Chunk>() };
for &byte in prefix.iter() {
if self.contains(byte) {
return Some(offset);
}
offset += 1;
}
for &chunk in body {
let byte_idxs = ((chunk >> 4) & 0x0F0F0F0F).to_le_bytes();
let bit_idxs = (chunk & 0x0F0F0F0F).to_le_bytes();
if (bm[byte_idxs[0] as usize] & (1 << bit_idxs[0])) != 0 {
return Some(offset);
}
if (bm[byte_idxs[1] as usize] & (1 << bit_idxs[1])) != 0 {
return Some(offset + 1);
}
if (bm[byte_idxs[2] as usize] & (1 << bit_idxs[2])) != 0 {
return Some(offset + 2);
}
if (bm[byte_idxs[3] as usize] & (1 << bit_idxs[3])) != 0 {
return Some(offset + 3);
}
offset += 4;
}
for &byte in suffix.iter() {
if self.contains(byte) {
return Some(offset);
}
offset += 1;
}
None
}
}
impl ByteSearcher for ByteBitmap {
#[inline(always)]
fn find_in(&self, bytes: &[u8]) -> Option<usize> {
if cfg!(feature = "prohibit-unsafe") {
for (idx, byte) in bytes.iter().enumerate() {
if self.contains(*byte) {
return Some(idx);
}
}
None
} else {
self.unsafe_find_in_slice(bytes)
}
}
}
impl fmt::Debug for ByteBitmap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_bitmap("ByteBitmap", f, |v| self.contains(v))
}
}
#[derive(Debug, Copy, Clone)]
pub struct EmptyString {}
impl ByteSearcher for EmptyString {
#[inline(always)]
fn find_in(&self, _bytes: &[u8]) -> Option<usize> {
Some(0)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_bitmap(bytes: &[u8]) -> ByteBitmap {
let mut bm = ByteBitmap::default();
for &b in bytes {
bm.set(b)
}
bm
}
#[test]
fn empty_search() {
assert_eq!(EmptyString {}.find_in(&[1, 2, 3]), Some(0));
assert_eq!(EmptyString {}.find_in(&[]), Some(0));
}
#[test]
fn bitmap_search() {
assert_eq!(make_bitmap(&[]).find_in(&[1, 2, 3]), None);
assert_eq!(make_bitmap(&[]).bitnot().find_in(&[1, 2, 3]), Some(0));
assert_eq!(make_bitmap(&[1]).bitnot().find_in(&[1, 2, 3]), Some(1));
assert_eq!(make_bitmap(&[2]).bitnot().find_in(&[1, 2, 3]), Some(0));
assert_eq!(
make_bitmap(&[4, 5, 6, 7]).find_in(&[8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
None
);
assert_eq!(
make_bitmap(&[4, 5, 6, 7]).find_in(&[8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
None
);
assert_eq!(
make_bitmap(&[4, 5, 6, 7])
.find_in(&[8, 9, 10, 11, 12, 13, 4, 14, 6, 15, 7, 16, 17, 18, 19, 20]),
Some(6)
);
}
#[test]
fn literal_search() {
assert_eq!([0, 1, 2, 3].find_in(&[4, 5, 6, 7]), None);
assert_eq!([0, 1, 2, 3].find_in(&[]), None);
}
}