use core::{cmp, fmt, mem, u16, usize};
use alloc::{string::String, vec, vec::Vec};
use crate::packed::api::MatchKind;
pub type PatternID = u16;
#[derive(Clone, Debug)]
pub struct Patterns {
kind: MatchKind,
by_id: Vec<Vec<u8>>,
order: Vec<PatternID>,
minimum_len: usize,
max_pattern_id: PatternID,
total_pattern_bytes: usize,
}
impl Patterns {
pub fn new() -> Patterns {
Patterns {
kind: MatchKind::default(),
by_id: vec![],
order: vec![],
minimum_len: usize::MAX,
max_pattern_id: 0,
total_pattern_bytes: 0,
}
}
pub fn add(&mut self, bytes: &[u8]) {
assert!(!bytes.is_empty());
assert!(self.by_id.len() <= u16::MAX as usize);
let id = self.by_id.len() as u16;
self.max_pattern_id = id;
self.order.push(id);
self.by_id.push(bytes.to_vec());
self.minimum_len = cmp::min(self.minimum_len, bytes.len());
self.total_pattern_bytes += bytes.len();
}
pub fn set_match_kind(&mut self, kind: MatchKind) {
self.kind = kind;
match self.kind {
MatchKind::LeftmostFirst => {
self.order.sort();
}
MatchKind::LeftmostLongest => {
let (order, by_id) = (&mut self.order, &mut self.by_id);
order.sort_by(|&id1, &id2| {
by_id[id1 as usize]
.len()
.cmp(&by_id[id2 as usize].len())
.reverse()
});
}
}
}
pub fn len(&self) -> usize {
self.by_id.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn memory_usage(&self) -> usize {
self.order.len() * mem::size_of::<PatternID>()
+ self.by_id.len() * mem::size_of::<Vec<u8>>()
+ self.total_pattern_bytes
}
pub fn reset(&mut self) {
self.kind = MatchKind::default();
self.by_id.clear();
self.order.clear();
self.minimum_len = usize::MAX;
self.max_pattern_id = 0;
}
pub fn max_pattern_id(&self) -> PatternID {
assert_eq!((self.max_pattern_id + 1) as usize, self.len());
self.max_pattern_id
}
pub fn minimum_len(&self) -> usize {
self.minimum_len
}
pub fn match_kind(&self) -> &MatchKind {
&self.kind
}
pub fn get(&self, id: PatternID) -> Pattern<'_> {
Pattern(&self.by_id[id as usize])
}
#[cfg(all(feature = "std", target_arch = "x86_64"))]
pub unsafe fn get_unchecked(&self, id: PatternID) -> Pattern<'_> {
Pattern(self.by_id.get_unchecked(id as usize))
}
pub fn iter(&self) -> PatternIter<'_> {
PatternIter { patterns: self, i: 0 }
}
}
#[derive(Debug)]
pub struct PatternIter<'p> {
patterns: &'p Patterns,
i: usize,
}
impl<'p> Iterator for PatternIter<'p> {
type Item = (PatternID, Pattern<'p>);
fn next(&mut self) -> Option<(PatternID, Pattern<'p>)> {
if self.i >= self.patterns.len() {
return None;
}
let id = self.patterns.order[self.i];
let p = self.patterns.get(id);
self.i += 1;
Some((id, p))
}
}
#[derive(Clone)]
pub struct Pattern<'a>(&'a [u8]);
impl<'a> fmt::Debug for Pattern<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Pattern")
.field("lit", &String::from_utf8_lossy(&self.0))
.finish()
}
}
impl<'p> Pattern<'p> {
pub fn len(&self) -> usize {
self.0.len()
}
pub fn bytes(&self) -> &[u8] {
&self.0
}
#[cfg(all(feature = "std", target_arch = "x86_64"))]
pub fn low_nybbles(&self, len: usize) -> Vec<u8> {
let mut nybs = vec![];
for &b in self.bytes().iter().take(len) {
nybs.push(b & 0xF);
}
nybs
}
#[inline(always)]
pub fn is_prefix(&self, bytes: &[u8]) -> bool {
self.len() <= bytes.len() && self.equals(&bytes[..self.len()])
}
#[inline(always)]
pub fn equals(&self, bytes: &[u8]) -> bool {
let (x, y) = (self.bytes(), bytes);
if x.len() != y.len() {
return false;
}
if x.len() < 4 {
for (&b1, &b2) in x.iter().zip(y) {
if b1 != b2 {
return false;
}
}
return true;
}
unsafe {
let (mut px, mut py) = (x.as_ptr(), y.as_ptr());
let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4));
while px < pxend {
let vx = (px as *const u32).read_unaligned();
let vy = (py as *const u32).read_unaligned();
if vx != vy {
return false;
}
px = px.add(4);
py = py.add(4);
}
let vx = (pxend as *const u32).read_unaligned();
let vy = (pyend as *const u32).read_unaligned();
vx == vy
}
}
}