use super::rule::{RuleInfo, SatisfactionMethod};
pub(super) const DIRECT_RULE_BIT: u32 = 1 << 31;
pub(super) const BITMASK_CAPACITY: usize = 64;
pub(super) const PROCESS_TYPE_TABLE_SIZE: usize = 128;
const KIND_SHIFT: u32 = 30;
const PT_SHIFT: u32 = 27;
const PT_MASK: u32 = 0x07 << PT_SHIFT;
const BOUNDARY_SHIFT: u32 = 25;
const BOUNDARY_MASK: u32 = 0x03 << BOUNDARY_SHIFT;
const OFFSET_SHIFT: u32 = 19;
const OFFSET_MASK: u32 = 0x3F << OFFSET_SHIFT;
const RULE_IDX_MASK: u32 = (1 << OFFSET_SHIFT) - 1;
#[inline(always)]
pub(super) fn encode_direct(
pt_index: u8,
boundary: u8,
kind: PatternKind,
offset: u8,
rule_idx: u32,
) -> Option<u32> {
if pt_index >= 8 || offset >= 64 || rule_idx > RULE_IDX_MASK {
return None;
}
let kind_bit = match kind {
PatternKind::And => 0,
PatternKind::Not => 1u32 << KIND_SHIFT,
};
Some(
DIRECT_RULE_BIT
| kind_bit
| ((pt_index as u32) << PT_SHIFT)
| ((boundary as u32) << BOUNDARY_SHIFT)
| ((offset as u32) << OFFSET_SHIFT)
| rule_idx,
)
}
#[inline(always)]
pub(super) fn decode_direct(raw: u32) -> (u8, u8, PatternKind, usize, usize) {
let pt_index = ((raw & PT_MASK) >> PT_SHIFT) as u8;
let boundary = ((raw & BOUNDARY_MASK) >> BOUNDARY_SHIFT) as u8;
let kind = if (raw >> KIND_SHIFT) & 1 == 0 {
PatternKind::And
} else {
PatternKind::Not
};
let offset = ((raw & OFFSET_MASK) >> OFFSET_SHIFT) as usize;
let rule_idx = (raw & RULE_IDX_MASK) as usize;
(pt_index, boundary, kind, offset, rule_idx)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub(super) enum PatternKind {
And = 0,
Not = 1,
}
#[derive(Debug, Clone)]
pub(super) struct PatternEntry {
pub(super) rule_idx: u32,
pub(super) offset: u8,
pub(super) pt_index: u8,
pub(super) kind: PatternKind,
pub(super) boundary: u8,
}
#[derive(Clone)]
pub(super) struct PatternIndex {
entries: Vec<PatternEntry>,
ranges: Vec<(usize, usize)>,
has_boundary: bool,
}
pub(super) enum PatternDispatch<'a> {
SingleEntry(&'a PatternEntry),
Entries(&'a [PatternEntry]),
}
impl PatternIndex {
pub(super) fn new(dedup_entries: Vec<Vec<PatternEntry>>) -> Self {
let mut entries = Vec::with_capacity(dedup_entries.iter().map(|bucket| bucket.len()).sum());
let mut ranges = Vec::with_capacity(dedup_entries.len());
for bucket in dedup_entries {
let start = entries.len();
let len = bucket.len();
entries.extend(bucket);
ranges.push((start, len));
}
let has_boundary = entries.iter().any(|e| e.boundary != 0);
Self {
entries,
ranges,
has_boundary,
}
}
pub(super) fn heap_bytes(&self) -> usize {
self.entries.capacity() * size_of::<PatternEntry>()
+ self.ranges.capacity() * size_of::<(usize, usize)>()
}
pub(super) fn has_boundary(&self) -> bool {
self.has_boundary
}
#[inline(always)]
pub(super) fn all_single_and(&self, rule_info: &[RuleInfo]) -> bool {
self.ranges.iter().all(|&(_, len)| len == 1)
&& self.entries.iter().all(|e| {
let info = rule_info[e.rule_idx as usize];
info.method == SatisfactionMethod::Immediate && !info.has_not
})
}
pub(super) fn build_value_map(&self, rule_info: &[RuleInfo]) -> Vec<u32> {
let mut value_map = Vec::with_capacity(self.ranges.len());
for (dedup_idx, &(start, len)) in self.ranges.iter().enumerate() {
if len == 1 {
let entry = unsafe { self.entries.get_unchecked(start) };
if let Some(encoded) = Self::try_encode_direct(entry, rule_info) {
value_map.push(encoded);
continue;
}
}
value_map.push(dedup_idx as u32);
}
value_map
}
fn try_encode_direct(entry: &PatternEntry, rule_info: &[RuleInfo]) -> Option<u32> {
if rule_info[entry.rule_idx as usize].method.use_matrix() {
return None;
}
encode_direct(
entry.pt_index,
entry.boundary,
entry.kind,
entry.offset,
entry.rule_idx,
)
}
#[inline(always)]
pub(super) fn dispatch_indirect(&self, raw_value: u32) -> PatternDispatch<'_> {
let pattern_idx = raw_value as usize;
let (start, len) = unsafe {
core::hint::assert_unchecked(raw_value & DIRECT_RULE_BIT == 0);
core::hint::assert_unchecked(pattern_idx < self.ranges.len());
*self.ranges.get_unchecked(pattern_idx)
};
unsafe { core::hint::assert_unchecked(start + len <= self.entries.len()) };
if len == 1 {
PatternDispatch::SingleEntry(unsafe { self.entries.get_unchecked(start) })
} else {
PatternDispatch::Entries(unsafe { self.entries.get_unchecked(start..start + len) })
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encoding_roundtrip() {
let cases: &[(u8, u8, PatternKind, u8, u32)] = &[
(3, 1, PatternKind::And, 0, 12345),
(1, 0, PatternKind::Not, 5, 500),
(2, 0, PatternKind::And, 31, 42),
(7, 3, PatternKind::Not, 63, RULE_IDX_MASK),
];
for &(pt, bd, kind, off, idx) in cases {
let raw = encode_direct(pt, bd, kind, off, idx).unwrap();
assert!(raw & DIRECT_RULE_BIT != 0);
let (dpt, dbd, dkind, doff, didx) = decode_direct(raw);
assert_eq!(
(dpt, dbd, dkind, doff, didx),
(pt, bd, kind, off as usize, idx as usize)
);
}
}
#[test]
fn encoding_overflow_returns_none() {
assert!(encode_direct(8, 0, PatternKind::And, 0, 0).is_none());
assert!(encode_direct(0, 0, PatternKind::And, 64, 0).is_none());
assert!(encode_direct(0, 0, PatternKind::And, 0, RULE_IDX_MASK + 1).is_none());
}
fn entry(
rule_idx: u32,
offset: u8,
pt_index: u8,
kind: PatternKind,
boundary: u8,
) -> PatternEntry {
PatternEntry {
rule_idx,
offset,
pt_index,
kind,
boundary,
}
}
fn immediate_info() -> RuleInfo {
RuleInfo {
and_count: 1,
method: SatisfactionMethod::Immediate,
has_not: false,
}
}
fn bitmask_info(and_count: u8) -> RuleInfo {
RuleInfo {
and_count,
method: SatisfactionMethod::Bitmask,
has_not: false,
}
}
fn matrix_info(and_count: u8) -> RuleInfo {
RuleInfo {
and_count,
method: SatisfactionMethod::Matrix,
has_not: false,
}
}
fn ri_for(rule_idx: u32, info: RuleInfo) -> Vec<RuleInfo> {
let mut ri = vec![immediate_info(); rule_idx as usize + 1];
ri[rule_idx as usize] = info;
ri
}
type EncodingCase = (
(u32, u8, u8, PatternKind, u8),
RuleInfo,
(u8, PatternKind, usize),
);
#[test]
fn test_direct_encoding_variants() {
let cases: &[EncodingCase] = &[
(
(5, 0, 2, PatternKind::And, 1),
immediate_info(),
(2, PatternKind::And, 5),
),
(
(100, 0, 3, PatternKind::And, 0),
RuleInfo {
and_count: 1,
method: SatisfactionMethod::Immediate,
has_not: true,
},
(3, PatternKind::And, 100),
),
(
(42, 1, 2, PatternKind::And, 0),
bitmask_info(3),
(2, PatternKind::And, 42),
),
(
(50, 1, 0, PatternKind::Not, 0),
RuleInfo {
and_count: 1,
method: SatisfactionMethod::Immediate,
has_not: true,
},
(0, PatternKind::Not, 50),
),
];
for &(ref e, ref ri, (exp_pt, exp_kind, exp_idx)) in cases {
let entries = vec![vec![entry(e.0, e.1, e.2, e.3, e.4)]];
let ri_vec = ri_for(e.0, *ri);
let raw = PatternIndex::new(entries).build_value_map(&ri_vec)[0];
assert!(raw & DIRECT_RULE_BIT != 0, "should use direct encoding");
let (pt, _, kind, _, idx) = decode_direct(raw);
assert_eq!((pt, kind, idx), (exp_pt, exp_kind, exp_idx));
}
}
#[test]
fn test_matrix_always_falls_back() {
let entries = vec![vec![entry(0, 0, 0, PatternKind::And, 0)]];
let ri = [matrix_info(2)];
assert!(PatternIndex::new(entries).build_value_map(&ri)[0] & DIRECT_RULE_BIT == 0);
}
}