use std::sync::atomic::{AtomicU64, Ordering};
use bytes::Bytes;
use memchr::memmem::Finder;
use regex::bytes::Regex;
use crate::split::{extract_header_value, PayloadParts};
static RAND_COUNTER: AtomicU64 = AtomicU64::new(0);
#[derive(Debug)]
pub struct CompiledFilter {
bytecode: Box<[u8]>,
searchers: Box<[Finder<'static>]>,
strings: Box<[Box<[u8]>]>,
regexes: Box<[Regex]>,
string_sets: Box<[Box<[u16]>]>,
delimiter: Box<[u8]>,
delimiter_finder: Finder<'static>,
source: Box<str>,
}
impl CompiledFilter {
pub fn new(
bytecode: Vec<u8>,
strings: Vec<Vec<u8>>,
regexes: Vec<Regex>,
string_sets: Vec<Vec<u16>>,
delimiter: Vec<u8>,
source: String,
) -> Self {
let searchers: Vec<Finder<'static>> = strings
.iter()
.map(|s| {
let bytes: &'static [u8] = Box::leak(s.clone().into_boxed_slice());
Finder::new(bytes)
})
.collect();
let strings: Vec<Box<[u8]>> = strings.into_iter().map(|s| s.into_boxed_slice()).collect();
let string_sets: Vec<Box<[u16]>> = string_sets
.into_iter()
.map(|s| s.into_boxed_slice())
.collect();
let delimiter = delimiter.into_boxed_slice();
let delim_bytes: &'static [u8] = Box::leak(delimiter.clone());
let delimiter_finder = Finder::new(delim_bytes);
Self {
bytecode: bytecode.into_boxed_slice(),
searchers: searchers.into_boxed_slice(),
strings: strings.into_boxed_slice(),
regexes: regexes.into_boxed_slice(),
string_sets: string_sets.into_boxed_slice(),
delimiter,
delimiter_finder,
source: source.into_boxed_str(),
}
}
#[inline]
pub fn evaluate(&self, payload: Bytes) -> bool {
let mut parts = PayloadParts::new_lazy(payload);
let delim_len = self.delimiter.len();
let mut stack = [false; 32];
let mut sp: usize = 0;
let mut pc: usize = 0;
let payload_bytes = parts.payload().as_ref() as *const [u8];
let payload_bytes: &[u8] = unsafe { &*payload_bytes };
loop {
debug_assert!(pc < self.bytecode.len(), "PC out of bounds");
debug_assert!(sp < 32, "Stack overflow");
match self.bytecode[pc] {
0x01 => {
stack[sp] = true;
sp += 1;
pc += 1;
}
0x02 => {
stack[sp] = false;
sp += 1;
pc += 1;
}
0x10 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
stack[sp] = self.searchers[idx].find(payload_bytes).is_some();
sp += 1;
pc += 3;
}
0x11 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
stack[sp] = payload_bytes.starts_with(&self.strings[idx]);
sp += 1;
pc += 3;
}
0x12 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
stack[sp] = payload_bytes.ends_with(&self.strings[idx]);
sp += 1;
pc += 3;
}
0x13 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
stack[sp] = payload_bytes == &self.strings[idx][..];
sp += 1;
pc += 3;
}
0x20 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
stack[sp] = self.regexes[idx].is_match(payload_bytes);
sp += 1;
pc += 3;
}
0x30 => {
debug_assert!(sp >= 2, "Stack underflow on AND");
sp -= 1;
stack[sp - 1] = stack[sp - 1] && stack[sp];
pc += 1;
}
0x31 => {
debug_assert!(sp >= 2, "Stack underflow on OR");
sp -= 1;
stack[sp - 1] = stack[sp - 1] || stack[sp];
pc += 1;
}
0x32 => {
debug_assert!(sp >= 1, "Stack underflow on NOT");
stack[sp - 1] = !stack[sp - 1];
pc += 1;
}
0x40 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
stack[sp] = self.searchers[str_idx].find(part).is_some();
sp += 1;
pc += 4;
}
0x41 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
stack[sp] = part.starts_with(&self.strings[str_idx]);
sp += 1;
pc += 4;
}
0x42 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
stack[sp] = part.ends_with(&self.strings[str_idx]);
sp += 1;
pc += 4;
}
0x43 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
stack[sp] = part == &self.strings[str_idx][..];
sp += 1;
pc += 4;
}
0x44 => {
let part_idx = self.bytecode[pc + 1] as usize;
let regex_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
stack[sp] = self.regexes[regex_idx].is_match(part);
sp += 1;
pc += 4;
}
0x45 => {
let part_idx = self.bytecode[pc + 1] as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
stack[sp] = parts.get(part_idx).is_empty();
sp += 1;
pc += 2;
}
0x46 => {
let part_idx = self.bytecode[pc + 1] as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
stack[sp] = !parts.get(part_idx).is_empty();
sp += 1;
pc += 2;
}
0x47 => {
let part_idx = self.bytecode[pc + 1] as usize;
let set_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let set = &self.string_sets[set_idx];
stack[sp] = set
.iter()
.any(|&str_idx| part == &self.strings[str_idx as usize][..]);
sp += 1;
pc += 4;
}
0x48 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
stack[sp] = part.eq_ignore_ascii_case(&self.strings[str_idx]);
sp += 1;
pc += 4;
}
0x49 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let needle = &self.strings[str_idx];
stack[sp] = icontains(part, needle);
sp += 1;
pc += 4;
}
0x50 => {
let part_idx = self.bytecode[pc + 1] as usize;
let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let headers = parts.get(part_idx);
let header_name = &self.strings[hdr_idx];
let expected = &self.strings[val_idx];
stack[sp] = extract_header_value(headers, header_name)
.map(|v| v == &expected[..])
.unwrap_or(false);
sp += 1;
pc += 6;
}
0x51 => {
let part_idx = self.bytecode[pc + 1] as usize;
let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let headers = parts.get(part_idx);
let header_name = &self.strings[hdr_idx];
let expected = &self.strings[val_idx];
stack[sp] = extract_header_value(headers, header_name)
.map(|v| v.eq_ignore_ascii_case(expected))
.unwrap_or(false);
sp += 1;
pc += 6;
}
0x52 => {
let part_idx = self.bytecode[pc + 1] as usize;
let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let headers = parts.get(part_idx);
let header_name = &self.strings[hdr_idx];
stack[sp] = extract_header_value(headers, header_name)
.map(|v| self.searchers[val_idx].find(v).is_some())
.unwrap_or(false);
sp += 1;
pc += 6;
}
0x53 => {
let part_idx = self.bytecode[pc + 1] as usize;
let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let headers = parts.get(part_idx);
let header_name = &self.strings[hdr_idx];
stack[sp] = extract_header_value(headers, header_name).is_some();
sp += 1;
pc += 4;
}
0x70 => {
debug_assert!(sp >= 1, "Stack underflow on JumpIfFalse");
if !stack[sp - 1] {
let offset = read_i16(&self.bytecode, pc + 1);
pc = (pc as isize + offset as isize) as usize;
} else {
sp -= 1;
pc += 3;
}
}
0x71 => {
debug_assert!(sp >= 1, "Stack underflow on JumpIfTrue");
if stack[sp - 1] {
let offset = read_i16(&self.bytecode, pc + 1);
pc = (pc as isize + offset as isize) as usize;
} else {
sp -= 1;
pc += 3;
}
}
0x60 => {
let n = read_u16(&self.bytecode, pc + 1);
stack[sp] = rand_1_in_n(n);
sp += 1;
pc += 3;
}
0xFF => {
debug_assert!(sp >= 1, "Stack underflow on RETURN");
return stack[sp - 1];
}
_ => {
#[cfg(debug_assertions)]
panic!("Unknown opcode: 0x{:02X} at pc={}", self.bytecode[pc], pc);
#[cfg(not(debug_assertions))]
return false;
}
}
}
}
pub fn evaluate_debug(&self, payload: Bytes) -> bool {
let mut parts = PayloadParts::new_lazy(payload);
let delim_len = self.delimiter.len();
let mut stack = [false; 32];
let mut sp: usize = 0;
let mut pc: usize = 0;
let payload_bytes = parts.payload().as_ref() as *const [u8];
let payload_bytes: &[u8] = unsafe { &*payload_bytes };
let mut trace_lines: Vec<String> = Vec::new();
loop {
debug_assert!(pc < self.bytecode.len());
debug_assert!(sp < 32);
match self.bytecode[pc] {
0x01 => {
stack[sp] = true;
trace_lines.push(format!(" pc={pc:3} PushTrue → stack[{sp}]=true"));
sp += 1; pc += 1;
}
0x02 => {
stack[sp] = false;
trace_lines.push(format!(" pc={pc:3} PushFalse → stack[{sp}]=false"));
sp += 1; pc += 1;
}
0x10 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
let result = self.searchers[idx].find(payload_bytes).is_some();
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} Contains str[{idx}]={:?} → {result}",
String::from_utf8_lossy(&self.strings[idx])
));
sp += 1; pc += 3;
}
0x11 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
let result = payload_bytes.starts_with(&self.strings[idx]);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} StartsWith str[{idx}]={:?} → {result}",
String::from_utf8_lossy(&self.strings[idx])
));
sp += 1; pc += 3;
}
0x12 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
let result = payload_bytes.ends_with(&self.strings[idx]);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} EndsWith str[{idx}]={:?} → {result}",
String::from_utf8_lossy(&self.strings[idx])
));
sp += 1; pc += 3;
}
0x13 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
let result = payload_bytes == &self.strings[idx][..];
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} Equals str[{idx}]={:?} → {result}",
String::from_utf8_lossy(&self.strings[idx])
));
sp += 1; pc += 3;
}
0x20 => {
let idx = read_u16(&self.bytecode, pc + 1) as usize;
let result = self.regexes[idx].is_match(payload_bytes);
stack[sp] = result;
trace_lines.push(format!(" pc={pc:3} Matches regex[{idx}] → {result}"));
sp += 1; pc += 3;
}
0x30 => {
sp -= 1;
let result = stack[sp - 1] && stack[sp];
stack[sp - 1] = result;
trace_lines.push(format!(" pc={pc:3} And → {result}"));
pc += 1;
}
0x31 => {
sp -= 1;
let result = stack[sp - 1] || stack[sp];
stack[sp - 1] = result;
trace_lines.push(format!(" pc={pc:3} Or → {result}"));
pc += 1;
}
0x32 => {
stack[sp - 1] = !stack[sp - 1];
trace_lines.push(format!(" pc={pc:3} Not → {}", stack[sp - 1]));
pc += 1;
}
0x40 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let result = self.searchers[str_idx].find(part).is_some();
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} PartContains part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
String::from_utf8_lossy(part),
String::from_utf8_lossy(&self.strings[str_idx])
));
sp += 1; pc += 4;
}
0x41 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let result = part.starts_with(&self.strings[str_idx]);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} PartStartsWith part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
String::from_utf8_lossy(part),
String::from_utf8_lossy(&self.strings[str_idx])
));
sp += 1; pc += 4;
}
0x42 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let result = part.ends_with(&self.strings[str_idx]);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} PartEndsWith part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
String::from_utf8_lossy(part),
String::from_utf8_lossy(&self.strings[str_idx])
));
sp += 1; pc += 4;
}
0x43 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let result = part == &self.strings[str_idx][..];
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} PartEquals part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
String::from_utf8_lossy(part),
String::from_utf8_lossy(&self.strings[str_idx])
));
sp += 1; pc += 4;
}
0x44 => {
let part_idx = self.bytecode[pc + 1] as usize;
let regex_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let result = self.regexes[regex_idx].is_match(part);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} PartMatches part[{part_idx}]={:?} regex[{regex_idx}] → {result}",
String::from_utf8_lossy(part)
));
sp += 1; pc += 4;
}
0x45 => {
let part_idx = self.bytecode[pc + 1] as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let result = parts.get(part_idx).is_empty();
stack[sp] = result;
trace_lines.push(format!(" pc={pc:3} PartIsEmpty part[{part_idx}] → {result}"));
sp += 1; pc += 2;
}
0x46 => {
let part_idx = self.bytecode[pc + 1] as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let result = !parts.get(part_idx).is_empty();
stack[sp] = result;
trace_lines.push(format!(" pc={pc:3} PartNotEmpty part[{part_idx}] → {result}"));
sp += 1; pc += 2;
}
0x47 => {
let part_idx = self.bytecode[pc + 1] as usize;
let set_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let set = &self.string_sets[set_idx];
let result = set.iter().any(|&si| part == &self.strings[si as usize][..]);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} PartInSet part[{part_idx}]={:?} set[{set_idx}] → {result}",
String::from_utf8_lossy(part)
));
sp += 1; pc += 4;
}
0x48 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let result = part.eq_ignore_ascii_case(&self.strings[str_idx]);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} PartIEquals part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
String::from_utf8_lossy(part),
String::from_utf8_lossy(&self.strings[str_idx])
));
sp += 1; pc += 4;
}
0x49 => {
let part_idx = self.bytecode[pc + 1] as usize;
let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let part = parts.get(part_idx);
let needle = &self.strings[str_idx];
let result = icontains(part, needle);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} PartIContains part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
String::from_utf8_lossy(part),
String::from_utf8_lossy(&self.strings[str_idx])
));
sp += 1; pc += 4;
}
0x50 => {
let part_idx = self.bytecode[pc + 1] as usize;
let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let headers = parts.get(part_idx);
let header_name = &self.strings[hdr_idx];
let expected = &self.strings[val_idx];
let extracted = extract_header_value(headers, header_name);
let result = extracted.map(|v| v == &expected[..]).unwrap_or(false);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} HeaderEquals part[{part_idx}] hdr={:?} expected={:?} got={:?} → {result}",
String::from_utf8_lossy(header_name),
String::from_utf8_lossy(expected),
extracted.map(|v| String::from_utf8_lossy(v).to_string())
));
sp += 1; pc += 6;
}
0x51 => {
let part_idx = self.bytecode[pc + 1] as usize;
let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let headers = parts.get(part_idx);
let header_name = &self.strings[hdr_idx];
let expected = &self.strings[val_idx];
let extracted = extract_header_value(headers, header_name);
let result = extracted.map(|v| v.eq_ignore_ascii_case(expected)).unwrap_or(false);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} HeaderIEquals part[{part_idx}] hdr={:?} expected={:?} got={:?} → {result}",
String::from_utf8_lossy(header_name),
String::from_utf8_lossy(expected),
extracted.map(|v| String::from_utf8_lossy(v).to_string())
));
sp += 1; pc += 6;
}
0x52 => {
let part_idx = self.bytecode[pc + 1] as usize;
let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let headers = parts.get(part_idx);
let header_name = &self.strings[hdr_idx];
let extracted = extract_header_value(headers, header_name);
let result = extracted.map(|v| self.searchers[val_idx].find(v).is_some()).unwrap_or(false);
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} HeaderContains part[{part_idx}] hdr={:?} needle={:?} got={:?} → {result}",
String::from_utf8_lossy(header_name),
String::from_utf8_lossy(&self.strings[val_idx]),
extracted.map(|v| String::from_utf8_lossy(v).to_string())
));
sp += 1; pc += 6;
}
0x53 => {
let part_idx = self.bytecode[pc + 1] as usize;
let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
parts.ensure(part_idx, &self.delimiter_finder, delim_len);
let headers = parts.get(part_idx);
let header_name = &self.strings[hdr_idx];
let result = extract_header_value(headers, header_name).is_some();
stack[sp] = result;
trace_lines.push(format!(
" pc={pc:3} HeaderExists part[{part_idx}] hdr={:?} → {result}",
String::from_utf8_lossy(header_name)
));
sp += 1; pc += 4;
}
0x70 => {
if !stack[sp - 1] {
let offset = read_i16(&self.bytecode, pc + 1);
trace_lines.push(format!(" pc={pc:3} JumpIfFalse → false, jump by {offset}"));
pc = (pc as isize + offset as isize) as usize;
} else {
trace_lines.push(format!(" pc={pc:3} JumpIfFalse → true, pop & continue"));
sp -= 1;
pc += 3;
}
}
0x71 => {
if stack[sp - 1] {
let offset = read_i16(&self.bytecode, pc + 1);
trace_lines.push(format!(" pc={pc:3} JumpIfTrue → true, jump by {offset}"));
pc = (pc as isize + offset as isize) as usize;
} else {
trace_lines.push(format!(" pc={pc:3} JumpIfTrue → false, pop & continue"));
sp -= 1;
pc += 3;
}
}
0x60 => {
let n = read_u16(&self.bytecode, pc + 1);
let result = rand_1_in_n(n);
stack[sp] = result;
trace_lines.push(format!(" pc={pc:3} Rand(1/{n}) → {result}"));
sp += 1; pc += 3;
}
0xFF => {
let result = stack[sp - 1];
if result {
eprintln!("=== FILTER DEBUG (result=true) filter={:?} ===", self.source);
for line in &trace_lines {
eprintln!("{line}");
}
eprintln!("=== END FILTER DEBUG ===");
}
return result;
}
_ => {
#[cfg(debug_assertions)]
panic!("Unknown opcode: 0x{:02X} at pc={}", self.bytecode[pc], pc);
#[cfg(not(debug_assertions))]
return false;
}
}
}
}
pub fn source(&self) -> &str {
&self.source
}
pub fn bytecode_len(&self) -> usize {
self.bytecode.len()
}
pub fn string_count(&self) -> usize {
self.strings.len()
}
pub fn regex_count(&self) -> usize {
self.regexes.len()
}
pub fn delimiter(&self) -> &[u8] {
&self.delimiter
}
}
#[inline(always)]
fn read_u16(bytecode: &[u8], offset: usize) -> u16 {
u16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
}
#[inline(always)]
fn read_i16(bytecode: &[u8], offset: usize) -> i16 {
i16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
}
#[inline]
fn icontains(haystack: &[u8], needle: &[u8]) -> bool {
if needle.is_empty() {
return true;
}
if haystack.len() < needle.len() {
return false;
}
for window in haystack.windows(needle.len()) {
if window.eq_ignore_ascii_case(needle) {
return true;
}
}
false
}
#[inline]
fn rand_1_in_n(n: u16) -> bool {
if n <= 1 {
return true;
}
let count = RAND_COUNTER.fetch_add(1, Ordering::Relaxed);
count.is_multiple_of(n as u64)
}
pub fn reset_rand_counter() {
RAND_COUNTER.store(0, Ordering::Relaxed);
}
#[cfg(test)]
mod tests {
use super::*;
fn make_simple_filter(opcode: u8, str_idx: u16, needle: &str) -> CompiledFilter {
let mut bytecode = vec![opcode];
bytecode.extend_from_slice(&str_idx.to_le_bytes());
bytecode.push(0xFF);
CompiledFilter::new(
bytecode,
vec![needle.as_bytes().to_vec()],
vec![],
vec![],
b";;;".to_vec(),
format!("test filter"),
)
}
#[test]
fn test_contains() {
let filter = make_simple_filter(0x10, 0, "hello");
assert!(filter.evaluate(Bytes::from("say hello world")));
assert!(!filter.evaluate(Bytes::from("say goodbye")));
}
#[test]
fn test_starts_with() {
let filter = make_simple_filter(0x11, 0, "hello");
assert!(filter.evaluate(Bytes::from("hello world")));
assert!(!filter.evaluate(Bytes::from("say hello")));
}
#[test]
fn test_ends_with() {
let filter = make_simple_filter(0x12, 0, "world");
assert!(filter.evaluate(Bytes::from("hello world")));
assert!(!filter.evaluate(Bytes::from("world hello")));
}
#[test]
fn test_equals() {
let filter = make_simple_filter(0x13, 0, "hello");
assert!(filter.evaluate(Bytes::from("hello")));
assert!(!filter.evaluate(Bytes::from("hello world")));
}
#[test]
fn test_push_true() {
let filter = CompiledFilter::new(
vec![0x01, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"true".into(),
);
assert!(filter.evaluate(Bytes::from("anything")));
}
#[test]
fn test_push_false() {
let filter = CompiledFilter::new(
vec![0x02, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"false".into(),
);
assert!(!filter.evaluate(Bytes::from("anything")));
}
#[test]
fn test_and() {
let filter = CompiledFilter::new(
vec![0x01, 0x01, 0x30, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"true AND true".into(),
);
assert!(filter.evaluate(Bytes::from("")));
let filter = CompiledFilter::new(
vec![0x01, 0x02, 0x30, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"true AND false".into(),
);
assert!(!filter.evaluate(Bytes::from("")));
}
#[test]
fn test_or() {
let filter = CompiledFilter::new(
vec![0x02, 0x01, 0x31, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"false OR true".into(),
);
assert!(filter.evaluate(Bytes::from("")));
let filter = CompiledFilter::new(
vec![0x02, 0x02, 0x31, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"false OR false".into(),
);
assert!(!filter.evaluate(Bytes::from("")));
}
#[test]
fn test_not() {
let filter = CompiledFilter::new(
vec![0x01, 0x32, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"NOT true".into(),
);
assert!(!filter.evaluate(Bytes::from("")));
let filter = CompiledFilter::new(
vec![0x02, 0x32, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"NOT false".into(),
);
assert!(filter.evaluate(Bytes::from("")));
}
#[test]
fn test_part_equals() {
let filter = CompiledFilter::new(
vec![0x43, 0x01, 0x00, 0x00, 0xFF],
vec![b"2".to_vec()],
vec![],
vec![],
b";;;".to_vec(),
"field[1] == \"2\"".into(),
);
assert!(filter.evaluate(Bytes::from("v1;;;2;;;subtype")));
assert!(!filter.evaluate(Bytes::from("v1;;;1;;;subtype")));
}
#[test]
fn test_part_in_set() {
let filter = CompiledFilter::new(
vec![0x47, 0x01, 0x00, 0x00, 0xFF],
vec![b"1".to_vec(), b"2".to_vec(), b"3".to_vec()],
vec![],
vec![vec![0, 1, 2]], b";;;".to_vec(),
"field[1] in {\"1\", \"2\", \"3\"}".into(),
);
assert!(filter.evaluate(Bytes::from("v1;;;1;;;sub")));
assert!(filter.evaluate(Bytes::from("v1;;;2;;;sub")));
assert!(filter.evaluate(Bytes::from("v1;;;3;;;sub")));
assert!(!filter.evaluate(Bytes::from("v1;;;4;;;sub")));
}
#[test]
fn test_rand() {
reset_rand_counter();
let filter = CompiledFilter::new(
vec![0x60, 0x02, 0x00, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"rand(2)".into(),
);
let results: Vec<bool> = (0..10).map(|_| filter.evaluate(Bytes::from(""))).collect();
assert_eq!(
results,
vec![true, false, true, false, true, false, true, false, true, false]
);
}
#[test]
fn test_rand_always_true() {
reset_rand_counter();
let filter = CompiledFilter::new(
vec![0x60, 0x01, 0x00, 0xFF], vec![],
vec![],
vec![],
b";;;".to_vec(),
"rand(1)".into(),
);
for _ in 0..10 {
assert!(filter.evaluate(Bytes::from("")));
}
}
#[test]
fn test_regex_match() {
let filter = CompiledFilter::new(
vec![0x20, 0x00, 0x00, 0xFF], vec![],
vec![Regex::new(r"error_[0-9]+").unwrap()],
vec![],
b";;;".to_vec(),
"payload matches \"error_[0-9]+\"".into(),
);
assert!(filter.evaluate(Bytes::from("found error_123 in log")));
assert!(filter.evaluate(Bytes::from("error_0")));
assert!(!filter.evaluate(Bytes::from("error_abc")));
assert!(!filter.evaluate(Bytes::from("no errors")));
}
#[test]
fn test_header_iequals() {
let filter = CompiledFilter::new(
vec![0x51, 0x00, 0x00, 0x00, 0x01, 0x00, 0xFF],
vec![b"x-custom".to_vec(), b"expected".to_vec()],
vec![],
vec![],
b";;;".to_vec(),
"headers.header(\"x-custom\") iequals \"expected\"".into(),
);
assert!(filter.evaluate(Bytes::from("X-Custom: expected\r\n")));
assert!(filter.evaluate(Bytes::from("x-custom: EXPECTED\r\n")));
assert!(filter.evaluate(Bytes::from("X-CUSTOM: Expected\r\n")));
assert!(!filter.evaluate(Bytes::from("X-Custom: other\r\n")));
assert!(!filter.evaluate(Bytes::from("X-Other: expected\r\n")));
}
#[test]
fn test_complex_multi_clause_filter() {
let filter = CompiledFilter::new(
vec![
0x43, 0x01, 0x00, 0x00, 0x43, 0x02, 0x01, 0x00, 0x30, 0x51, 0x04, 0x02, 0x00, 0x03, 0x00, 0x30, 0xFF, ],
vec![
b"error".to_vec(),
b"500".to_vec(),
b"content-type".to_vec(),
b"application/json".to_vec(),
],
vec![],
vec![],
b";;;".to_vec(),
"multi-clause filter".into(),
);
let mut fields: Vec<&str> = vec![""; 6];
fields[1] = "error";
fields[2] = "500";
fields[4] = "Content-Type: application/json\r\n";
let payload = fields.join(";;;");
assert!(filter.evaluate(Bytes::from(payload)));
fields[1] = "info";
let payload = fields.join(";;;");
assert!(!filter.evaluate(Bytes::from(payload)));
fields[1] = "error";
fields[2] = "200";
let payload = fields.join(";;;");
assert!(!filter.evaluate(Bytes::from(payload)));
fields[2] = "500";
fields[4] = "Content-Type: text/html\r\n";
let payload = fields.join(";;;");
assert!(!filter.evaluate(Bytes::from(payload)));
}
}