use crate::insn::MAX_CHAR_SET_LENGTH;
use core::fmt;
extern crate memchr;
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
pub trait ByteSearcher {
fn find_in(&self, rhs: &[u8]) -> Option<usize>;
}
impl ByteSearcher for [u8; 1] {
#[inline(always)]
fn find_in(&self, rhs: &[u8]) -> Option<usize> {
memchr::memchr(self[0], rhs)
}
}
impl ByteSearcher for [u8; 2] {
#[inline(always)]
fn find_in(&self, rhs: &[u8]) -> Option<usize> {
memchr::memchr2(self[0], self[1], rhs)
}
}
impl ByteSearcher for [u8; 3] {
#[inline(always)]
fn find_in(&self, rhs: &[u8]) -> Option<usize> {
memchr::memchr3(self[0], self[1], self[2], rhs)
}
}
impl ByteSearcher for memchr::memmem::Finder<'_> {
fn find_in(&self, rhs: &[u8]) -> Option<usize> {
self.find(rhs)
}
}
pub trait ByteSet {
fn contains(&self, b: u8) -> bool;
}
#[derive(Copy, Clone, Debug)]
#[repr(align(4))]
pub struct ByteArraySet<ArraySet: SmallArraySet>(pub ArraySet);
impl<ArraySet: SmallArraySet> ByteArraySet<ArraySet> {
#[inline(always)]
pub fn contains(self, b: u8) -> bool {
self.0.contains(b)
}
}
impl<ArraySet: SmallArraySet> ByteSearcher for ByteArraySet<ArraySet> {
#[inline(always)]
fn find_in(&self, rhs: &[u8]) -> Option<usize> {
self.0.find_in(rhs)
}
}
pub trait SmallArraySet: Copy {
fn contains(self, b: u8) -> bool;
fn find_in(self, rhs: &[u8]) -> Option<usize>;
}
impl SmallArraySet for [u8; 2] {
#[inline(always)]
fn contains(self, b: u8) -> bool {
b == self[0] || b == self[1]
}
#[inline(always)]
fn find_in(self, rhs: &[u8]) -> Option<usize> {
memchr::memchr2(self[0], self[1], rhs)
}
}
impl SmallArraySet for [u8; 3] {
#[inline(always)]
fn contains(self, b: u8) -> bool {
b == self[0] || b == self[1] || b == self[2]
}
#[inline(always)]
fn find_in(self, rhs: &[u8]) -> Option<usize> {
memchr::memchr3(self[0], self[1], self[2], rhs)
}
}
impl SmallArraySet for [u8; 4] {
#[inline(always)]
fn contains(self, b: u8) -> bool {
b == self[0] || b == self[1] || b == self[2] || b == self[3]
}
#[inline(always)]
fn find_in(self, rhs: &[u8]) -> Option<usize> {
for (idx, byte) in rhs.iter().enumerate() {
if self.contains(*byte) {
return Some(idx);
}
}
None
}
}
#[allow(unused_parens)]
#[inline(always)]
pub fn charset_contains(set: &[u32; MAX_CHAR_SET_LENGTH], c: u32) -> bool {
let mut result = false;
for &v in set.iter() {
result |= (v == c);
}
result
}
fn format_bitmap<Func>(name: &str, f: &mut fmt::Formatter<'_>, contains: Func) -> fmt::Result
where
Func: Fn(u8) -> bool,
{
write!(f, "{}[", name)?;
let mut idx = 0;
let mut maybe_space = "";
while idx <= 256 {
let mut end = idx;
while end <= 256 && contains(end as u8) {
end += 1;
}
match end - idx {
0 => (),
1 => write!(f, "{}{}", maybe_space, idx)?,
_ => write!(f, "{}{}-{}", maybe_space, idx, end - 1)?,
};
if end > idx {
maybe_space = " ";
}
idx = end + 1
}
write!(f, "]")?;
Ok(())
}
#[derive(Default, Copy, Clone)]
#[repr(align(4))]
pub struct AsciiBitmap(pub [u8; 16]);
impl AsciiBitmap {
#[inline(always)]
pub fn set(&mut self, val: u8) {
debug_assert!(val <= 127, "Value should be ASCII");
self.0[(val >> 3) as usize] |= 1 << (val & 0x7);
}
}
impl fmt::Debug for AsciiBitmap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_bitmap("AsciiBitmap", f, |v| self.contains(v))
}
}
impl ByteSet for AsciiBitmap {
#[inline(always)]
fn contains(&self, val: u8) -> bool {
let byte = (val & 0x7F) >> 3;
let bit = val & 0x7;
let mask = ((val >> 7) ^ 1) << bit;
(self.0[byte as usize] & mask) != 0
}
}
#[derive(Default, Copy, Clone, PartialEq, Eq)]
#[repr(align(4))]
pub struct ByteBitmap([u16; 16]);
impl ByteBitmap {
pub fn new(bytes: &[u8]) -> ByteBitmap {
let mut bb = ByteBitmap::default();
for &b in bytes {
bb.set(b)
}
bb
}
#[inline(always)]
pub fn contains(&self, val: u8) -> bool {
let byte = val >> 4;
let bit = val & 0xF;
(self.0[byte as usize] & (1 << bit)) != 0
}
#[inline(always)]
pub fn set(&mut self, val: u8) {
let byte = val >> 4;
let bit = val & 0xF;
self.0[byte as usize] |= 1 << bit;
}
pub fn bitor(&mut self, rhs: &ByteBitmap) {
for idx in 0..self.0.len() {
self.0[idx] |= rhs.0[idx];
}
}
pub fn bitnot(&mut self) -> &mut Self {
for val in self.0.iter_mut() {
*val = !*val;
}
self
}
pub fn count_bits(&self) -> u32 {
self.0.iter().map(|v| v.count_ones()).sum()
}
#[allow(clippy::wrong_self_convention)]
#[inline(always)]
pub fn as_array<const N: usize>(&self) -> [u8; N] {
let mut array = [0u8; N];
let mut idx = 0;
for byte in 0..=255 {
if self.contains(byte) {
array[idx] = byte;
idx += 1;
}
}
array
}
#[inline(always)]
fn unsafe_find_in_slice(&self, bytes: &[u8]) -> Option<usize> {
type Chunk = u32;
let bm = &self.0;
let mut offset = 0;
let (prefix, body, suffix) = unsafe { bytes.align_to::<Chunk>() };
for &byte in prefix.iter() {
if self.contains(byte) {
return Some(offset);
}
offset += 1;
}
for &chunk in body {
let byte_idxs = ((chunk >> 4) & 0x0F0F0F0F).to_le_bytes();
let bit_idxs = (chunk & 0x0F0F0F0F).to_le_bytes();
if (bm[byte_idxs[0] as usize] & (1 << bit_idxs[0])) != 0 {
return Some(offset);
}
if (bm[byte_idxs[1] as usize] & (1 << bit_idxs[1])) != 0 {
return Some(offset + 1);
}
if (bm[byte_idxs[2] as usize] & (1 << bit_idxs[2])) != 0 {
return Some(offset + 2);
}
if (bm[byte_idxs[3] as usize] & (1 << bit_idxs[3])) != 0 {
return Some(offset + 3);
}
offset += 4;
}
for &byte in suffix.iter() {
if self.contains(byte) {
return Some(offset);
}
offset += 1;
}
None
}
}
impl ByteSearcher for ByteBitmap {
#[inline(always)]
fn find_in(&self, bytes: &[u8]) -> Option<usize> {
if cfg!(feature = "prohibit-unsafe") {
for (idx, byte) in bytes.iter().enumerate() {
if self.contains(*byte) {
return Some(idx);
}
}
None
} else {
self.unsafe_find_in_slice(bytes)
}
}
}
impl fmt::Debug for ByteBitmap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
format_bitmap("ByteBitmap", f, |v| self.contains(v))
}
}
#[derive(Debug, Copy, Clone)]
pub struct EmptyString {}
impl ByteSearcher for EmptyString {
#[inline(always)]
fn find_in(&self, _bytes: &[u8]) -> Option<usize> {
Some(0)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_bitmap(bytes: &[u8]) -> ByteBitmap {
let mut bm = ByteBitmap::default();
for &b in bytes {
bm.set(b)
}
bm
}
#[test]
fn empty_search() {
assert_eq!(EmptyString {}.find_in(&[1, 2, 3]), Some(0));
assert_eq!(EmptyString {}.find_in(&[]), Some(0));
}
#[test]
fn bitmap_search() {
assert_eq!(make_bitmap(&[]).find_in(&[1, 2, 3]), None);
assert_eq!(make_bitmap(&[]).bitnot().find_in(&[1, 2, 3]), Some(0));
assert_eq!(make_bitmap(&[1]).bitnot().find_in(&[1, 2, 3]), Some(1));
assert_eq!(make_bitmap(&[2]).bitnot().find_in(&[1, 2, 3]), Some(0));
assert_eq!(
make_bitmap(&[4, 5, 6, 7]).find_in(&[8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
None
);
assert_eq!(
make_bitmap(&[4, 5, 6, 7]).find_in(&[8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
None
);
assert_eq!(
make_bitmap(&[4, 5, 6, 7])
.find_in(&[8, 9, 10, 11, 12, 13, 4, 14, 6, 15, 7, 16, 17, 18, 19, 20]),
Some(6)
);
}
#[test]
fn literal_search() {
assert_eq!([0, 1, 2, 3].find_in(&[4, 5, 6, 7]), None);
assert_eq!([0, 1, 2, 3].find_in(&[]), None);
}
}