use std::cell::{Cell, RefCell};
use std::rc::Rc;
use std::sync::Arc;
use rustc_hash::{FxHashMap, FxHashSet};
use super::arena::{
NfaBuffers as ArenaNfaBuffers, StateArena, StateId, insert_string, insert_suffix,
make_anything_but_arena_fa, make_cidr_arena_fa, make_monocase_arena_fa,
make_numeric_greater_arena_fa, make_numeric_less_arena_fa, make_numeric_range_arena_fa,
make_prefix_arena_fa, make_shellstyle_arena_fa, make_string_arena_fa, make_suffix_dfa,
make_wildcard_arena_fa, merge_arena_nfas, traverse_arena_dfa, traverse_arena_dfa_backward,
traverse_arena_nfa,
};
use super::small_table::{FieldMatcher, NfaBuffers};
use crate::regexp::make_regexp_nfa_arena;
#[inline]
fn quote_wrap(val: &[u8]) -> Vec<u8> {
let mut result = Vec::with_capacity(val.len() + 2);
result.push(b'"');
result.extend_from_slice(val);
result.push(b'"');
result
}
#[derive(Clone, Debug)]
pub struct ConditionNfa {
pub arena: StateArena,
pub start: StateId,
pub is_negative: bool,
}
#[derive(Clone)]
pub struct MultiConditionNfa {
pub primary_arena: StateArena,
pub primary_start: StateId,
pub field_matcher_ptr: *const FieldMatcher,
pub conditions: Vec<ConditionNfa>,
}
fn build_lookbehind_combined_pattern(
lookbehind: &crate::regexp::RegexpRoot,
primary: &crate::regexp::RegexpRoot,
) -> crate::regexp::RegexpRoot {
use crate::regexp::RegexpBranch;
if lookbehind.is_empty() {
return primary.clone();
}
if primary.is_empty() {
return lookbehind.clone();
}
if lookbehind.len() == 1 && primary.len() == 1 {
let mut combined: RegexpBranch = lookbehind[0].clone();
combined.extend(primary[0].clone());
return vec![combined];
}
let mut combined_branches = Vec::new();
for lb_branch in lookbehind {
for p_branch in primary {
let mut combined: RegexpBranch = lb_branch.clone();
combined.extend(p_branch.clone());
combined_branches.push(combined);
}
}
combined_branches
}
#[derive(Default)]
pub struct MutableFieldMatcher<X: Clone + Eq + std::hash::Hash> {
pub transitions: RefCell<FxHashMap<String, Rc<MutableValueMatcher<X>>>>,
pub matches: RefCell<Vec<X>>,
pub exists_true: RefCell<FxHashMap<String, Rc<Self>>>,
pub exists_false: RefCell<FxHashMap<String, Rc<Self>>>,
}
impl<X: Clone + Eq + std::hash::Hash> MutableFieldMatcher<X> {
#[must_use]
pub fn new() -> Self {
Self {
transitions: RefCell::new(FxHashMap::default()),
matches: RefCell::new(Vec::new()),
exists_true: RefCell::new(FxHashMap::default()),
exists_false: RefCell::new(FxHashMap::default()),
}
}
pub fn add_match(&self, x: X) {
self.matches.borrow_mut().push(x);
}
pub fn add_exists(&self, exists: bool, path: &str) -> Rc<Self> {
let map = if exists {
&self.exists_true
} else {
&self.exists_false
};
let mut map_borrow = map.borrow_mut();
if let Some(existing) = map_borrow.get(path) {
existing.clone()
} else {
let new_fm = Rc::new(Self::new());
map_borrow.insert(path.to_string(), new_fm.clone());
new_fm
}
}
pub fn add_transition(
&self,
path: &str,
matchers: &[crate::json::Matcher],
budget: usize,
) -> Result<Vec<Rc<Self>>, crate::QuaminaError> {
use crate::json::Matcher;
let mut transitions = self.transitions.borrow_mut();
let vm = transitions
.entry(path.to_string())
.or_insert_with(|| Rc::new(MutableValueMatcher::new()));
let all_exact: Vec<&[u8]> = matchers
.iter()
.filter_map(|m| match m {
Matcher::Exact(s) => Some(s.as_bytes()),
_ => None,
})
.collect();
if all_exact.len() == matchers.len() {
let next_fm = vm.add_string_transitions_bulk(&all_exact, budget)?;
return Ok(vec![next_fm]);
}
let mut next_states = Vec::new();
for matcher in matchers {
let next_fm = vm.add_transition(matcher, budget)?;
next_states.push(next_fm);
}
Ok(next_states)
}
pub fn transition_on(
&self,
path: &str,
value: &[u8],
is_number: bool,
bufs: &mut NfaBuffers,
) -> Vec<Rc<Self>> {
let transitions = self.transitions.borrow();
if let Some(vm) = transitions.get(path) {
vm.transition_on(value, is_number, bufs)
} else {
vec![]
}
}
}
pub struct MutableValueMatcher<X: Clone + Eq + std::hash::Hash> {
pub(crate) singleton_match: RefCell<Option<Vec<u8>>>,
pub(crate) singleton_transition: RefCell<Option<Rc<MutableFieldMatcher<X>>>>,
pub(crate) has_numbers: Cell<bool>,
pub(crate) transition_map: RefCell<FxHashMap<*const FieldMatcher, Rc<MutableFieldMatcher<X>>>>,
pub(crate) multi_condition_nfas: RefCell<Vec<MultiConditionNfa>>,
pub(crate) arena_bufs: RefCell<ArenaNfaBuffers>,
pub(crate) main_arena: RefCell<Option<(StateArena, StateId)>>,
pub(crate) main_arena_is_nfa: RefCell<bool>,
pub(crate) suffix_arena: RefCell<Option<(StateArena, StateId)>>,
}
impl<X: Clone + Eq + std::hash::Hash> Default for MutableValueMatcher<X> {
fn default() -> Self {
Self::new()
}
}
impl<X: Clone + Eq + std::hash::Hash> MutableValueMatcher<X> {
#[must_use]
pub fn new() -> Self {
Self {
singleton_match: RefCell::new(None),
singleton_transition: RefCell::new(None),
has_numbers: Cell::new(false),
transition_map: RefCell::new(FxHashMap::default()),
multi_condition_nfas: RefCell::new(Vec::new()),
arena_bufs: RefCell::new(ArenaNfaBuffers::new()),
main_arena: RefCell::new(None),
main_arena_is_nfa: RefCell::new(false),
suffix_arena: RefCell::new(None),
}
}
fn check_budget(size: usize, budget: usize) -> Result<(), crate::QuaminaError> {
if budget != 0 && size > budget {
return Err(crate::QuaminaError::PatternTooComplex(format!(
"automaton byte size ({size} bytes) exceeds budget ({budget} bytes)"
)));
}
Ok(())
}
fn check_main_arena_budget(&self, budget: usize) -> Result<(), crate::QuaminaError> {
let main = self.main_arena.borrow();
if let Some((arena, _)) = main.as_ref() {
Self::check_budget(arena.estimated_byte_size(), budget)
} else {
Ok(())
}
}
fn merge_into_main_arena(
&self,
new_arena: StateArena,
new_start: StateId,
budget: usize,
) -> Result<(), crate::QuaminaError> {
Self::check_budget(new_arena.estimated_byte_size(), budget)?;
let mut main = self.main_arena.borrow_mut();
if let Some((existing_arena, existing_start)) = main.take() {
let (merged, merged_start) =
merge_arena_nfas(&existing_arena, existing_start, &new_arena, new_start);
let merged_size = merged.estimated_byte_size();
if budget != 0 && merged_size > budget {
*main = Some((existing_arena, existing_start));
return Err(crate::QuaminaError::PatternTooComplex(format!(
"automaton byte size ({merged_size} bytes) exceeds budget ({budget} bytes)"
)));
}
*main = Some((merged, merged_start));
} else {
*main = Some((new_arena, new_start));
}
Ok(())
}
fn take_singleton_as_arena(&self) -> Option<(StateArena, StateId)> {
if self.singleton_match.borrow().is_none() {
return None;
}
let singleton_val = self.singleton_match.borrow().clone().unwrap();
let singleton_trans = self.singleton_transition.borrow().clone().unwrap();
let singleton_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&singleton_arc), singleton_trans);
let result = make_string_arena_fa(&singleton_val, singleton_arc);
*self.singleton_match.borrow_mut() = None;
*self.singleton_transition.borrow_mut() = None;
Some(result)
}
fn merge_with_singleton(
&self,
new_arena: StateArena,
new_start: StateId,
budget: usize,
) -> Result<(), crate::QuaminaError> {
if let Some((singleton_arena, singleton_start)) = self.take_singleton_as_arena() {
let (merged, merged_start) =
merge_arena_nfas(&singleton_arena, singleton_start, &new_arena, new_start);
self.merge_into_main_arena(merged, merged_start, budget)
} else {
self.merge_into_main_arena(new_arena, new_start, budget)
}
}
fn ensure_main_arena_with_singleton(&self, budget: usize) -> Result<(), crate::QuaminaError> {
if self.main_arena.borrow().is_some() {
if let Some((singleton_arena, singleton_start)) = self.take_singleton_as_arena() {
self.merge_into_main_arena(singleton_arena, singleton_start, budget)?;
}
return Ok(());
}
if let Some((arena, start)) = self.take_singleton_as_arena() {
Self::check_budget(arena.estimated_byte_size(), budget)?;
*self.main_arena.borrow_mut() = Some((arena, start));
} else {
let mut arena = StateArena::new();
let start = arena.alloc();
arena.precompute_epsilon_closures();
*self.main_arena.borrow_mut() = Some((arena, start));
}
Ok(())
}
pub fn add_transition(
&self,
matcher: &crate::json::Matcher,
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
use crate::json::Matcher;
match matcher {
Matcher::Exact(s) => self.add_string_transition(s.as_bytes(), budget),
Matcher::NumericExact(n) => self.add_numeric_transition(*n, budget),
Matcher::Prefix(s) => {
let mut quoted = Vec::with_capacity(s.len() + 1);
quoted.push(b'"');
quoted.extend_from_slice(s.as_bytes());
self.add_prefix_transition("ed, budget)
}
Matcher::Shellstyle(s) => {
self.add_shellstyle_transition("e_wrap(s.as_bytes()), budget)
}
Matcher::Wildcard(s) => self.add_wildcard_transition("e_wrap(s.as_bytes()), budget),
Matcher::AnythingBut(excluded) => {
let excluded_bytes: Vec<Vec<u8>> =
excluded.iter().map(|s| quote_wrap(s.as_bytes())).collect();
self.add_anything_but_transition(&excluded_bytes, budget)
}
Matcher::AnythingButNumeric(excluded) => {
self.has_numbers.set(true);
self.add_anything_but_numeric_transition(excluded, budget)
}
Matcher::EqualsIgnoreCase(s) => {
self.add_monocase_transition("e_wrap(s.as_bytes()), budget)
}
Matcher::ParsedRegexp(tree) => self.add_regexp_transition(tree, budget),
Matcher::MultiCondition(mc) => self.add_multi_condition_transition(mc, budget),
Matcher::Suffix(s) => self.add_suffix_transition(s, budget),
Matcher::Numeric(cmp) => {
self.has_numbers.set(true);
self.add_numeric_range_transition(cmp, budget)
}
Matcher::Cidr(cidr) => self.add_cidr_transition(cidr, budget),
Matcher::Exists(_) => Ok(Rc::new(MutableFieldMatcher::new())),
}
}
fn add_string_transitions_bulk(
&self,
values: &[&[u8]],
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
if values.is_empty() {
return Ok(Rc::new(MutableFieldMatcher::new()));
}
if values.len() == 1 {
return self.add_string_transition(values[0], budget);
}
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
self.ensure_main_arena_with_singleton(budget)?;
{
let mut main = self.main_arena.borrow_mut();
let (arena, start) = main.as_mut().unwrap();
for val in values {
insert_string(arena, *start, val, next_arc.clone());
}
}
self.check_main_arena_budget(budget)?;
Ok(next_fm)
}
fn add_string_transition(
&self,
val: &[u8],
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let singleton = self.singleton_match.borrow();
let singleton_trans = self.singleton_transition.borrow();
let is_virgin = singleton.is_none()
&& self.main_arena.borrow().is_none()
&& self.suffix_arena.borrow().is_none();
if is_virgin {
drop(singleton);
drop(singleton_trans);
let next_fm = Rc::new(MutableFieldMatcher::new());
*self.singleton_match.borrow_mut() = Some(val.to_vec());
*self.singleton_transition.borrow_mut() = Some(next_fm.clone());
return Ok(next_fm);
}
if let Some(ref existing) = *singleton
&& existing == val
{
return Ok(singleton_trans.as_ref().unwrap().clone());
}
drop(singleton);
drop(singleton_trans);
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
self.ensure_main_arena_with_singleton(budget)?;
{
let mut main = self.main_arena.borrow_mut();
let (arena, start) = main.as_mut().unwrap();
insert_string(arena, *start, val, next_arc);
}
self.check_main_arena_budget(budget)?;
Ok(next_fm)
}
fn add_numeric_transition(
&self,
num: f64,
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
self.has_numbers.set(true);
let val_str = num.to_string();
let val = val_str.as_bytes();
let q_num = crate::numbits::q_num_from_f64(num);
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
self.ensure_main_arena_with_singleton(budget)?;
{
let mut main = self.main_arena.borrow_mut();
let (arena, start) = main.as_mut().unwrap();
insert_string(arena, *start, val, next_arc.clone());
insert_string(arena, *start, &q_num, next_arc);
}
self.check_main_arena_budget(budget)?;
Ok(next_fm)
}
fn add_prefix_transition(
&self,
prefix: &[u8],
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
let (new_arena, new_start) = make_prefix_arena_fa(prefix, next_arc);
self.merge_with_singleton(new_arena, new_start, budget)?;
Ok(next_fm)
}
fn add_shellstyle_transition(
&self,
pattern: &[u8],
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
*self.main_arena_is_nfa.borrow_mut() = true;
let (new_arena, new_start) = make_shellstyle_arena_fa(pattern, next_arc);
self.merge_with_singleton(new_arena, new_start, budget)?;
Ok(next_fm)
}
fn add_suffix_transition(
&self,
suffix: &str,
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
if self.singleton_match.borrow().is_some()
&& let Some((singleton_arena, singleton_start)) = self.take_singleton_as_arena()
{
self.merge_into_main_arena(singleton_arena, singleton_start, budget)?;
}
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
let suffix_bytes = suffix.as_bytes();
let mut reversed = Vec::with_capacity(suffix_bytes.len() + 1);
reversed.push(b'"'); reversed.extend(suffix_bytes.iter().rev());
let mut suffix_arena = self.suffix_arena.borrow_mut();
if let Some((ref mut arena, start)) = *suffix_arena {
insert_suffix(arena, start, &reversed, next_arc);
} else {
let (arena, start) = make_suffix_dfa(&reversed, next_arc);
*suffix_arena = Some((arena, start));
}
Ok(next_fm)
}
fn add_wildcard_transition(
&self,
pattern: &[u8],
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
*self.main_arena_is_nfa.borrow_mut() = true;
let (new_arena, new_start) = make_wildcard_arena_fa(pattern, next_arc);
self.merge_with_singleton(new_arena, new_start, budget)?;
Ok(next_fm)
}
fn add_anything_but_transition(
&self,
excluded: &[Vec<u8>],
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
let (new_arena, new_start) = make_anything_but_arena_fa(excluded, next_arc);
self.merge_with_singleton(new_arena, new_start, budget)?;
Ok(next_fm)
}
fn add_anything_but_numeric_transition(
&self,
excluded: &[f64],
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
let excluded_q_nums: Vec<Vec<u8>> = excluded
.iter()
.map(|&n| crate::numbits::q_num_from_f64(n))
.collect();
let (new_arena, new_start) = make_anything_but_arena_fa(&excluded_q_nums, next_arc);
self.merge_with_singleton(new_arena, new_start, budget)?;
Ok(next_fm)
}
fn add_monocase_transition(
&self,
val: &[u8],
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
let (new_arena, new_start) = make_monocase_arena_fa(val, next_arc);
self.merge_with_singleton(new_arena, new_start, budget)?;
Ok(next_fm)
}
fn add_regexp_transition(
&self,
tree: &crate::regexp::RegexpRoot,
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let next_fm = Rc::new(MutableFieldMatcher::new());
let (arena, start, field_matcher_arc) = make_regexp_nfa_arena(tree.clone());
if arena.is_nondeterministic() {
*self.main_arena_is_nfa.borrow_mut() = true;
}
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&field_matcher_arc), next_fm.clone());
self.merge_with_singleton(arena, start, budget)?;
Ok(next_fm)
}
fn add_multi_condition_transition(
&self,
mc: &crate::json::MultiConditionPattern,
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
use crate::json::LookaroundCondition;
let next_fm = Rc::new(MutableFieldMatcher::new());
let (primary_arena, primary_start, field_matcher_arc) =
make_regexp_nfa_arena(mc.primary.clone());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&field_matcher_arc), next_fm.clone());
let mut condition_nfas = Vec::new();
for condition in &mc.conditions {
let (combined_pattern, is_negative) = match condition {
LookaroundCondition::PositiveLookahead(pattern) => {
(pattern.clone(), false)
}
LookaroundCondition::NegativeLookahead(pattern) => {
(pattern.clone(), true)
}
LookaroundCondition::PositiveLookbehind { pattern, .. } => {
let combined = build_lookbehind_combined_pattern(pattern, &mc.primary);
(combined, false)
}
LookaroundCondition::NegativeLookbehind { pattern, .. } => {
let combined = build_lookbehind_combined_pattern(pattern, &mc.primary);
(combined, true)
}
};
let (arena, start, _) = make_regexp_nfa_arena(combined_pattern);
condition_nfas.push(ConditionNfa {
arena,
start,
is_negative,
});
}
Self::check_budget(primary_arena.estimated_byte_size(), budget)?;
for cond in &condition_nfas {
Self::check_budget(cond.arena.estimated_byte_size(), budget)?;
}
self.multi_condition_nfas
.borrow_mut()
.push(MultiConditionNfa {
primary_arena,
primary_start,
field_matcher_ptr: Arc::as_ptr(&field_matcher_arc),
conditions: condition_nfas,
});
Ok(next_fm)
}
fn add_numeric_range_transition(
&self,
cmp: &crate::json::NumericComparison,
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
let (new_arena, new_start) = match (&cmp.lower, &cmp.upper) {
(Some((lower_incl, lower_val)), Some((upper_incl, upper_val))) => {
make_numeric_range_arena_fa(
*lower_val,
*lower_incl,
*upper_val,
*upper_incl,
next_arc,
)
}
(Some((incl, val)), None) => {
make_numeric_greater_arena_fa(*val, *incl, next_arc)
}
(None, Some((incl, val))) => {
make_numeric_less_arena_fa(*val, *incl, next_arc)
}
(None, None) => {
return Ok(next_fm);
}
};
self.merge_with_singleton(new_arena, new_start, budget)?;
Ok(next_fm)
}
fn add_cidr_transition(
&self,
cidr: &crate::json::CidrPattern,
budget: usize,
) -> Result<Rc<MutableFieldMatcher<X>>, crate::QuaminaError> {
let next_fm = Rc::new(MutableFieldMatcher::new());
let next_arc = Arc::new(FieldMatcher::new());
self.transition_map
.borrow_mut()
.insert(Arc::as_ptr(&next_arc), next_fm.clone());
*self.main_arena_is_nfa.borrow_mut() = true;
let (new_arena, new_start) = make_cidr_arena_fa(cidr, next_arc);
self.merge_with_singleton(new_arena, new_start, budget)?;
Ok(next_fm)
}
pub fn transition_on(
&self,
value: &[u8],
is_number: bool,
_bufs: &mut NfaBuffers,
) -> Vec<Rc<MutableFieldMatcher<X>>> {
if self.multi_condition_nfas.borrow().is_empty()
&& let Some(ref singleton_val) = *self.singleton_match.borrow()
{
if singleton_val == value
&& let Some(ref trans) = *self.singleton_transition.borrow()
{
return vec![trans.clone()];
}
return vec![];
}
let transition_map = self.transition_map.borrow();
let mut result = Vec::new();
let has_singleton = if let Some(ref singleton_val) = *self.singleton_match.borrow() {
if singleton_val == value
&& let Some(ref trans) = *self.singleton_transition.borrow()
{
result.push(trans.clone());
}
true
} else {
false
};
let q_num_storage = self.maybe_q_number(value, is_number);
let value_to_match: &[u8] = match &q_num_storage {
Some(q) => q.as_slice(),
None => value,
};
if !has_singleton {
self.collect_arena_transitions(value_to_match, &transition_map, &mut result);
}
self.collect_multi_condition_transitions(value_to_match, &transition_map, &mut result);
result
}
fn maybe_q_number(
&self,
value: &[u8],
is_number: bool,
) -> Option<crate::numbits::QNumberStack> {
if !(self.has_numbers.get() && is_number) {
return None;
}
let s = std::str::from_utf8(value).ok()?;
let n = s.parse::<f64>().ok()?;
Some(crate::numbits::q_num_stack(n))
}
fn collect_arena_transitions(
&self,
value_to_match: &[u8],
transition_map: &FxHashMap<*const FieldMatcher, Rc<MutableFieldMatcher<X>>>,
result: &mut Vec<Rc<MutableFieldMatcher<X>>>,
) {
if let Some((ref arena, start)) = *self.main_arena.borrow() {
let mut arena_bufs = self.arena_bufs.borrow_mut();
if *self.main_arena_is_nfa.borrow() {
traverse_arena_nfa(arena, start, value_to_match, &mut arena_bufs);
} else {
arena_bufs.transitions.clear();
traverse_arena_dfa(arena, start, value_to_match, &mut arena_bufs.transitions);
}
for &ptr in &arena_bufs.transitions {
if let Some(mutable_fm) = transition_map.get(&(ptr as *const FieldMatcher)) {
result.push(mutable_fm.clone());
}
}
}
if let Some((ref arena, start)) = *self.suffix_arena.borrow() {
let mut arena_bufs = self.arena_bufs.borrow_mut();
arena_bufs.transitions.clear();
traverse_arena_dfa_backward(arena, start, value_to_match, &mut arena_bufs.transitions);
for &ptr in &arena_bufs.transitions {
if let Some(mutable_fm) = transition_map.get(&(ptr as *const FieldMatcher)) {
result.push(mutable_fm.clone());
}
}
}
}
fn collect_multi_condition_transitions(
&self,
value_to_match: &[u8],
transition_map: &FxHashMap<*const FieldMatcher, Rc<MutableFieldMatcher<X>>>,
result: &mut Vec<Rc<MutableFieldMatcher<X>>>,
) {
let multi_condition_nfas = self.multi_condition_nfas.borrow();
if multi_condition_nfas.is_empty() {
return;
}
let mut condition_bufs = self.arena_bufs.borrow_mut();
for mc_nfa in multi_condition_nfas.iter() {
let mut all_conditions_pass = true;
for condition in &mc_nfa.conditions {
traverse_arena_nfa(
&condition.arena,
condition.start,
value_to_match,
&mut condition_bufs,
);
let condition_matched = !condition_bufs.transitions.is_empty();
let condition_passes = if condition.is_negative {
!condition_matched
} else {
condition_matched
};
if !condition_passes {
all_conditions_pass = false;
break;
}
}
if all_conditions_pass {
let ptr = mc_nfa.field_matcher_ptr;
if let Some(mutable_fm) = transition_map.get(&ptr) {
if !result.iter().any(|r| Rc::ptr_eq(r, mutable_fm)) {
result.push(mutable_fm.clone());
}
}
}
}
}
}
#[derive(Clone, Debug)]
pub struct EventField {
pub path: String,
pub value: String,
pub array_trail: Vec<crate::json::ArrayPos>,
pub is_number: bool,
}
impl From<&crate::json::Field> for EventField {
fn from(f: &crate::json::Field) -> Self {
Self {
path: f.path.clone(),
value: f.value.clone(),
array_trail: f.array_trail.clone(),
is_number: f.is_number,
}
}
}
#[derive(Clone, Debug)]
pub struct EventFieldRef<'a> {
pub path: &'a str,
pub value: &'a [u8],
pub array_trail: &'a [crate::flatten_json::ArrayPos],
pub is_number: bool,
}
fn no_array_trail_conflict_ref(
from: &[crate::flatten_json::ArrayPos],
to: &[crate::flatten_json::ArrayPos],
) -> bool {
for from_pos in from {
for to_pos in to {
if from_pos.array == to_pos.array && from_pos.pos != to_pos.pos {
return false;
}
}
}
true
}
fn no_array_trail_conflict(from: &[crate::json::ArrayPos], to: &[crate::json::ArrayPos]) -> bool {
for from_pos in from {
for to_pos in to {
if from_pos.array == to_pos.array && from_pos.pos != to_pos.pos {
return false;
}
}
}
true
}
struct MatchSet<X: Clone + Eq + std::hash::Hash> {
seen: FxHashSet<X>,
matches: Vec<X>,
}
impl<X: Clone + Eq + std::hash::Hash> MatchSet<X> {
fn new() -> Self {
Self {
seen: FxHashSet::default(),
matches: Vec::new(),
}
}
fn add(&mut self, x: X) {
if !self.seen.contains(&x) {
self.seen.insert(x.clone());
self.matches.push(x);
}
}
fn into_vec(self) -> Vec<X> {
self.matches
}
}
#[derive(Default)]
pub struct CoreMatcher<X: Clone + Eq + std::hash::Hash> {
root: Rc<MutableFieldMatcher<X>>,
arena_byte_budget: usize,
}
impl<X: Clone + Eq + std::hash::Hash> CoreMatcher<X> {
#[must_use]
pub fn new() -> Self {
Self {
root: Rc::new(MutableFieldMatcher::new()),
arena_byte_budget: crate::PatternLimits::default().arena_byte_budget,
}
}
pub fn add_pattern(
&self,
x: X,
pattern_fields: &[(String, Vec<crate::json::Matcher>)],
) -> Result<(), crate::QuaminaError> {
let mut sorted_fields: Vec<_> = pattern_fields.to_vec();
sorted_fields.sort_by(|a, b| a.0.cmp(&b.0));
let mut states: Vec<Rc<MutableFieldMatcher<X>>> = vec![self.root.clone()];
for (path, matchers) in &sorted_fields {
if matchers.is_empty() {
continue;
}
let mut next_states = Vec::new();
for state in &states {
let first_matcher = &matchers[0];
match first_matcher {
crate::json::Matcher::Exists(true) => {
let next = state.add_exists(true, path);
next_states.push(next);
}
crate::json::Matcher::Exists(false) => {
let next = state.add_exists(false, path);
next_states.push(next);
}
_ => {
let nexts = state.add_transition(path, matchers, self.arena_byte_budget)?;
next_states.extend(nexts);
}
}
}
states = next_states;
}
for state in states {
state.add_match(x.clone());
}
Ok(())
}
#[must_use]
pub fn matches_for_fields(&self, fields: &[EventField]) -> Vec<X> {
if fields.is_empty() {
return Self::collect_exists_false_matches(&self.root);
}
let mut matches = MatchSet::new();
let mut bufs = NfaBuffers::new();
for i in 0..fields.len() {
self.try_to_match(fields, i, &self.root, &mut matches, &mut bufs);
}
matches.into_vec()
}
fn try_to_match(
&self,
fields: &[EventField],
index: usize,
state: &Rc<MutableFieldMatcher<X>>,
matches: &mut MatchSet<X>,
bufs: &mut NfaBuffers,
) {
let field = &fields[index];
if let Some(exists_trans) = state.exists_true.borrow().get(&field.path) {
for m in exists_trans.matches.borrow().iter() {
matches.add(m.clone());
}
for next_idx in (index + 1)..fields.len() {
if no_array_trail_conflict(&field.array_trail, &fields[next_idx].array_trail) {
self.try_to_match(fields, next_idx, exists_trans, matches, bufs);
}
}
self.check_exists_false(state, fields, index, matches, bufs);
}
self.check_exists_false(state, fields, index, matches, bufs);
let next_states =
state.transition_on(&field.path, field.value.as_bytes(), field.is_number, bufs);
for next_state in next_states {
for m in next_state.matches.borrow().iter() {
matches.add(m.clone());
}
for next_idx in (index + 1)..fields.len() {
if no_array_trail_conflict(&field.array_trail, &fields[next_idx].array_trail) {
self.try_to_match(fields, next_idx, &next_state, matches, bufs);
}
}
self.check_exists_false(&next_state, fields, index, matches, bufs);
}
}
fn check_exists_false(
&self,
state: &Rc<MutableFieldMatcher<X>>,
fields: &[EventField],
index: usize,
matches: &mut MatchSet<X>,
bufs: &mut NfaBuffers,
) {
for (path, exists_trans) in state.exists_false.borrow().iter() {
let field_exists = fields
.binary_search_by(|f| f.path.as_str().cmp(path.as_str()))
.is_ok();
if !field_exists {
for m in exists_trans.matches.borrow().iter() {
matches.add(m.clone());
}
self.try_to_match(fields, index, exists_trans, matches, bufs);
}
}
}
fn collect_exists_false_matches(state: &Rc<MutableFieldMatcher<X>>) -> Vec<X> {
let mut result = Vec::new();
for exists_trans in state.exists_false.borrow().values() {
result.extend(exists_trans.matches.borrow().iter().cloned());
}
result
}
pub fn matches_for_fields_ref(
&self,
fields: &[EventFieldRef<'_>],
bufs: &mut NfaBuffers,
) -> Vec<X> {
if fields.is_empty() {
return Self::collect_exists_false_matches(&self.root);
}
let mut matches = MatchSet::new();
bufs.clear();
for i in 0..fields.len() {
self.try_to_match_ref(fields, i, &self.root, &mut matches, bufs);
}
matches.into_vec()
}
fn try_to_match_ref(
&self,
fields: &[EventFieldRef<'_>],
index: usize,
state: &Rc<MutableFieldMatcher<X>>,
matches: &mut MatchSet<X>,
bufs: &mut NfaBuffers,
) {
let field = &fields[index];
if let Some(exists_trans) = state.exists_true.borrow().get(field.path) {
for m in exists_trans.matches.borrow().iter() {
matches.add(m.clone());
}
for next_idx in (index + 1)..fields.len() {
if no_array_trail_conflict_ref(field.array_trail, fields[next_idx].array_trail) {
self.try_to_match_ref(fields, next_idx, exists_trans, matches, bufs);
}
}
self.check_exists_false_ref(state, fields, index, matches, bufs);
}
self.check_exists_false_ref(state, fields, index, matches, bufs);
let next_states = state.transition_on(field.path, field.value, field.is_number, bufs);
for next_state in next_states {
for m in next_state.matches.borrow().iter() {
matches.add(m.clone());
}
for next_idx in (index + 1)..fields.len() {
if no_array_trail_conflict_ref(field.array_trail, fields[next_idx].array_trail) {
self.try_to_match_ref(fields, next_idx, &next_state, matches, bufs);
}
}
self.check_exists_false_ref(&next_state, fields, index, matches, bufs);
}
}
fn check_exists_false_ref(
&self,
state: &Rc<MutableFieldMatcher<X>>,
fields: &[EventFieldRef<'_>],
index: usize,
matches: &mut MatchSet<X>,
bufs: &mut NfaBuffers,
) {
for (path, exists_trans) in state.exists_false.borrow().iter() {
let field_exists = fields
.binary_search_by(|f| f.path.cmp(path.as_str()))
.is_ok();
if !field_exists {
for m in exists_trans.matches.borrow().iter() {
matches.add(m.clone());
}
self.try_to_match_ref(fields, index, exists_trans, matches, bufs);
}
}
}
pub fn matches_for_fields_direct(
&self,
fields: &[crate::flatten_json::Field<'_>],
bufs: &mut NfaBuffers,
) -> Vec<X> {
if fields.is_empty() {
return Self::collect_exists_false_matches(&self.root);
}
let mut matches = MatchSet::new();
bufs.clear();
for i in 0..fields.len() {
self.try_to_match_direct(fields, i, &self.root, &mut matches, bufs);
}
matches.into_vec()
}
fn try_to_match_direct(
&self,
fields: &[crate::flatten_json::Field<'_>],
index: usize,
state: &Rc<MutableFieldMatcher<X>>,
matches: &mut MatchSet<X>,
bufs: &mut NfaBuffers,
) {
let field = &fields[index];
let path = field.path_str();
let value = field.value_bytes();
let array_trail = field.array_trail_slice();
if let Some(exists_trans) = state.exists_true.borrow().get(path) {
for m in exists_trans.matches.borrow().iter() {
matches.add(m.clone());
}
for next_idx in (index + 1)..fields.len() {
if no_array_trail_conflict_ref(array_trail, fields[next_idx].array_trail_slice()) {
self.try_to_match_direct(fields, next_idx, exists_trans, matches, bufs);
}
}
self.check_exists_false_direct(state, fields, index, matches, bufs);
}
self.check_exists_false_direct(state, fields, index, matches, bufs);
let next_states = state.transition_on(path, value, field.is_number, bufs);
for next_state in next_states {
for m in next_state.matches.borrow().iter() {
matches.add(m.clone());
}
for next_idx in (index + 1)..fields.len() {
if no_array_trail_conflict_ref(array_trail, fields[next_idx].array_trail_slice()) {
self.try_to_match_direct(fields, next_idx, &next_state, matches, bufs);
}
}
self.check_exists_false_direct(&next_state, fields, index, matches, bufs);
}
}
fn check_exists_false_direct(
&self,
state: &Rc<MutableFieldMatcher<X>>,
fields: &[crate::flatten_json::Field<'_>],
index: usize,
matches: &mut MatchSet<X>,
bufs: &mut NfaBuffers,
) {
for (path, exists_trans) in state.exists_false.borrow().iter() {
let field_exists = fields
.binary_search_by(|f| f.path.as_ref().cmp(path.as_bytes()))
.is_ok();
if !field_exists {
for m in exists_trans.matches.borrow().iter() {
matches.add(m.clone());
}
self.try_to_match_direct(fields, index, exists_trans, matches, bufs);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::json::Matcher;
use crate::regexp::parse_regexp;
#[test]
fn test_value_matcher_regexp_with_plus() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let regexp_tree = parse_regexp("[a-z]+@example~.com").unwrap();
let matcher = Matcher::ParsedRegexp(regexp_tree);
let next_fm = vm.add_transition(&matcher, 0).unwrap();
assert!(
vm.main_arena.borrow().is_some(),
"main_arena should be set for regexp"
);
let mut bufs = NfaBuffers::new();
let value = qv(b"alice@example.com");
let results = vm.transition_on(&value, false, &mut bufs);
assert_eq!(
results.len(),
1,
"Should match 'alice@example.com', got {} results",
results.len()
);
assert!(
Rc::ptr_eq(&results[0], &next_fm),
"Should return the next field matcher"
);
bufs.clear();
let no_match_value = qv(b"alice@exampleXcom");
let no_results = vm.transition_on(&no_match_value, false, &mut bufs);
assert!(
no_results.is_empty(),
"Should not match 'alice@exampleXcom'"
);
}
#[test]
fn test_value_matcher_regexp_without_plus() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let regexp_tree = parse_regexp("[abc]").unwrap();
let matcher = Matcher::ParsedRegexp(regexp_tree);
let next_fm = vm.add_transition(&matcher, 0).unwrap();
assert!(
vm.main_arena.borrow().is_some(),
"main_arena should be set for regexp"
);
let mut bufs = NfaBuffers::new();
let value = qv(b"a");
let results = vm.transition_on(&value, false, &mut bufs);
assert_eq!(results.len(), 1, "Should match 'a'");
assert!(
Rc::ptr_eq(&results[0], &next_fm),
"Should return the next field matcher"
);
}
#[test]
fn test_core_matcher_with_arena_regexp() {
let cm: CoreMatcher<String> = CoreMatcher::new();
let pattern_json = r#"{"email": [{"regex": "[a-z]+@example~.com"}]}"#;
let pattern =
crate::json::parse_pattern(pattern_json, &crate::PatternLimits::default()).unwrap();
let pattern_vec: Vec<_> = pattern.into_iter().collect();
cm.add_pattern("p1".to_string(), &pattern_vec).unwrap();
let fields = vec![EventField {
path: "email".to_string(),
value: "\"alice@example.com\"".to_string(),
array_trail: vec![],
is_number: false,
}];
let matches = cm.matches_for_fields(&fields);
assert_eq!(matches, vec!["p1".to_string()], "Should match the pattern");
let fields_no_match = vec![EventField {
path: "email".to_string(),
value: "\"alice@exampleXcom\"".to_string(),
array_trail: vec![],
is_number: false,
}];
let no_matches = cm.matches_for_fields(&fields_no_match);
assert!(no_matches.is_empty(), "Should not match");
}
#[test]
fn test_core_matcher_direct_with_arena_regexp() {
use std::sync::Arc;
let cm: CoreMatcher<String> = CoreMatcher::new();
let pattern_json = r#"{"email": [{"regex": "[a-z]+@example~.com"}]}"#;
let pattern =
crate::json::parse_pattern(pattern_json, &crate::PatternLimits::default()).unwrap();
let pattern_vec: Vec<_> = pattern.into_iter().collect();
cm.add_pattern("p1".to_string(), &pattern_vec).unwrap();
let fields = vec![crate::flatten_json::Field {
path: Arc::from(b"email".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"alice@example.com\""),
array_trail: [].as_slice().into(),
is_number: false,
}];
let mut bufs = NfaBuffers::new();
let matches = cm.matches_for_fields_direct(&fields, &mut bufs);
assert_eq!(
matches,
vec!["p1".to_string()],
"Should match the pattern via matches_for_fields_direct"
);
}
fn qv(s: &[u8]) -> Vec<u8> {
let mut v = Vec::with_capacity(s.len() + 2);
v.push(b'"');
v.extend_from_slice(s);
v.push(b'"');
v
}
#[test]
fn test_arena_migration_string_single() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let matcher = Matcher::Exact("\"hello\"".to_string());
let next_fm = vm.add_transition(&matcher, 0).unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"hello"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &next_fm));
bufs.clear();
let results = vm.transition_on(&qv(b"world"), false, &mut bufs);
assert!(results.is_empty());
}
#[test]
fn test_arena_migration_string_multiple() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let fm1 = vm
.add_transition(&Matcher::Exact("\"foo\"".to_string()), 0)
.unwrap();
let fm2 = vm
.add_transition(&Matcher::Exact("\"bar\"".to_string()), 0)
.unwrap();
let fm3 = vm
.add_transition(&Matcher::Exact("\"baz\"".to_string()), 0)
.unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"foo"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &fm1));
bufs.clear();
let results = vm.transition_on(&qv(b"bar"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &fm2));
bufs.clear();
let results = vm.transition_on(&qv(b"baz"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &fm3));
bufs.clear();
let results = vm.transition_on(&qv(b"qux"), false, &mut bufs);
assert!(results.is_empty());
}
#[test]
fn test_arena_migration_prefix() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let matcher = Matcher::Prefix("hello".to_string());
let next_fm = vm.add_transition(&matcher, 0).unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"hello"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &next_fm));
bufs.clear();
let results = vm.transition_on(&qv(b"helloworld"), false, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(&qv(b"hello123"), false, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(&qv(b"hell"), false, &mut bufs);
assert!(results.is_empty());
bufs.clear();
let results = vm.transition_on(&qv(b"world"), false, &mut bufs);
assert!(results.is_empty());
}
#[test]
fn test_arena_migration_shellstyle() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let matcher = Matcher::Shellstyle("hello*world".to_string());
let next_fm = vm.add_transition(&matcher, 0).unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"helloworld"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &next_fm));
bufs.clear();
let results = vm.transition_on(&qv(b"hello_world"), false, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(&qv(b"hello123world"), false, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(&qv(b"helloworl"), false, &mut bufs);
assert!(results.is_empty());
bufs.clear();
let results = vm.transition_on(&qv(b"worldhello"), false, &mut bufs);
assert!(results.is_empty());
}
#[test]
fn test_arena_migration_wildcard_escape() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let matcher = Matcher::Wildcard("foo\\*bar".to_string());
let next_fm = vm.add_transition(&matcher, 0).unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"foo*bar"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &next_fm));
bufs.clear();
let results = vm.transition_on(&qv(b"foobar"), false, &mut bufs);
assert!(results.is_empty());
}
#[test]
fn test_arena_migration_anything_but() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let matcher = Matcher::AnythingBut(vec!["foo".to_string(), "bar".to_string()]);
let next_fm = vm.add_transition(&matcher, 0).unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"baz"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &next_fm));
bufs.clear();
let results = vm.transition_on(&qv(b"qux"), false, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(&qv(b"foo"), false, &mut bufs);
assert!(results.is_empty());
bufs.clear();
let results = vm.transition_on(&qv(b"bar"), false, &mut bufs);
assert!(results.is_empty());
}
#[test]
fn test_arena_migration_monocase() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let matcher = Matcher::EqualsIgnoreCase("Hello".to_string());
let next_fm = vm.add_transition(&matcher, 0).unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"Hello"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &next_fm));
bufs.clear();
let results = vm.transition_on(&qv(b"hello"), false, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(&qv(b"HELLO"), false, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(&qv(b"hElLo"), false, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(&qv(b"world"), false, &mut bufs);
assert!(results.is_empty());
}
#[test]
fn test_arena_migration_numeric_range() {
use crate::json::NumericComparison;
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let cmp = NumericComparison {
lower: Some((true, 10.0)), upper: Some((false, 100.0)), };
let matcher = Matcher::Numeric(cmp);
let next_fm = vm.add_transition(&matcher, 0).unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(b"10", true, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &next_fm));
bufs.clear();
let results = vm.transition_on(b"50", true, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(b"99", true, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(b"9", true, &mut bufs);
assert!(results.is_empty());
bufs.clear();
let results = vm.transition_on(b"100", true, &mut bufs);
assert!(results.is_empty());
}
#[test]
fn test_arena_migration_mixed_patterns() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let fm_exact = vm
.add_transition(&Matcher::Exact("\"exact\"".to_string()), 0)
.unwrap();
let fm_prefix = vm
.add_transition(&Matcher::Prefix("pre".to_string()), 0)
.unwrap();
let fm_shell = vm
.add_transition(&Matcher::Shellstyle("*wild*".to_string()), 0)
.unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"exact"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &fm_exact));
bufs.clear();
let results = vm.transition_on(&qv(b"prefix_value"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &fm_prefix));
bufs.clear();
let results = vm.transition_on(&qv(b"something_wild_here"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &fm_shell));
bufs.clear();
let results = vm.transition_on(&qv(b"prewild"), false, &mut bufs);
assert!(!results.is_empty());
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_arena_migration_cidr_v4() {
use crate::json::CidrPattern;
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let cidr = CidrPattern::V4 {
network: [192, 168, 1, 0],
prefix_len: 24,
};
let matcher = Matcher::Cidr(cidr);
let next_fm = vm.add_transition(&matcher, 0).unwrap();
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"192.168.1.1"), false, &mut bufs);
assert_eq!(results.len(), 1);
assert!(Rc::ptr_eq(&results[0], &next_fm));
bufs.clear();
let results = vm.transition_on(&qv(b"192.168.1.255"), false, &mut bufs);
assert_eq!(results.len(), 1);
bufs.clear();
let results = vm.transition_on(&qv(b"192.168.2.1"), false, &mut bufs);
assert!(results.is_empty());
bufs.clear();
let results = vm.transition_on(&qv(b"10.0.0.1"), false, &mut bufs);
assert!(results.is_empty());
}
#[test]
fn test_arena_migration_core_matcher_all_types() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"exact".to_string(),
&[(
"field".to_string(),
vec![Matcher::Exact("\"hello\"".to_string())],
)],
)
.unwrap();
cm.add_pattern(
"prefix".to_string(),
&[(
"field".to_string(),
vec![Matcher::Prefix("pre".to_string())],
)],
)
.unwrap();
cm.add_pattern(
"shell".to_string(),
&[(
"field".to_string(),
vec![Matcher::Shellstyle("*wild*".to_string())],
)],
)
.unwrap();
let fields = vec![EventField {
path: "field".to_string(),
value: "\"hello\"".to_string(),
array_trail: vec![],
is_number: false,
}];
let matches = cm.matches_for_fields(&fields);
assert!(matches.contains(&"exact".to_string()));
let fields = vec![EventField {
path: "field".to_string(),
value: "\"prefix_value\"".to_string(),
array_trail: vec![],
is_number: false,
}];
let matches = cm.matches_for_fields(&fields);
assert!(matches.contains(&"prefix".to_string()));
let fields = vec![EventField {
path: "field".to_string(),
value: "\"something_wild_here\"".to_string(),
array_trail: vec![],
is_number: false,
}];
let matches = cm.matches_for_fields(&fields);
assert!(matches.contains(&"shell".to_string()));
}
#[test]
fn test_match_set_dedup() {
let mut ms = MatchSet::<String>::new();
ms.add("a".to_string());
ms.add("b".to_string());
ms.add("a".to_string()); ms.add("c".to_string());
ms.add("b".to_string());
let result = ms.into_vec();
assert_eq!(result.len(), 3);
assert_eq!(result, vec!["a", "b", "c"]);
}
fn cm_two_field() -> CoreMatcher<String> {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[
(
"level".to_string(),
vec![Matcher::Exact("\"high\"".to_string())],
),
(
"status".to_string(),
vec![Matcher::Exact("\"active\"".to_string())],
),
],
)
.unwrap();
cm
}
fn cm_exists_false() -> CoreMatcher<String> {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[("gone".to_string(), vec![Matcher::Exists(false)])],
)
.unwrap();
cm
}
#[test]
fn test_core_matcher_array_trail_conflict() {
use crate::json::ArrayPos;
let cm = cm_two_field();
let conflicting = vec![
EventField {
path: "level".to_string(),
value: "\"high\"".to_string(),
array_trail: vec![ArrayPos { array: 1, pos: 0 }],
is_number: false,
},
EventField {
path: "status".to_string(),
value: "\"active\"".to_string(),
array_trail: vec![ArrayPos { array: 1, pos: 1 }],
is_number: false,
},
];
assert!(cm.matches_for_fields(&conflicting).is_empty());
let compatible = vec![
EventField {
path: "level".to_string(),
value: "\"high\"".to_string(),
array_trail: vec![ArrayPos { array: 1, pos: 0 }],
is_number: false,
},
EventField {
path: "status".to_string(),
value: "\"active\"".to_string(),
array_trail: vec![ArrayPos { array: 1, pos: 0 }],
is_number: false,
},
];
assert_eq!(cm.matches_for_fields(&compatible), vec!["p1"]);
}
#[test]
fn test_core_matcher_multi_field_owned() {
let cm = cm_two_field();
let fields = vec![
EventField {
path: "level".to_string(),
value: "\"high\"".to_string(),
array_trail: vec![],
is_number: false,
},
EventField {
path: "status".to_string(),
value: "\"active\"".to_string(),
array_trail: vec![],
is_number: false,
},
];
assert_eq!(cm.matches_for_fields(&fields), vec!["p1"]);
let single = vec![EventField {
path: "status".to_string(),
value: "\"active\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert!(cm.matches_for_fields(&single).is_empty());
}
#[test]
fn test_core_matcher_ref_multi_field() {
let cm = cm_two_field();
let mut bufs = NfaBuffers::new();
let fields = vec![
EventFieldRef {
path: "level",
value: b"\"high\"",
array_trail: &[],
is_number: false,
},
EventFieldRef {
path: "status",
value: b"\"active\"",
array_trail: &[],
is_number: false,
},
];
assert_eq!(cm.matches_for_fields_ref(&fields, &mut bufs), vec!["p1"]);
let single = vec![EventFieldRef {
path: "status",
value: b"\"active\"",
array_trail: &[],
is_number: false,
}];
assert!(cm.matches_for_fields_ref(&single, &mut bufs).is_empty());
}
#[test]
fn test_core_matcher_ref_array_trail_conflict() {
use crate::flatten_json::ArrayPos;
let cm = cm_two_field();
let mut bufs = NfaBuffers::new();
let trail_a = [ArrayPos { array: 1, pos: 0 }];
let trail_b = [ArrayPos { array: 1, pos: 1 }];
let conflicting = vec![
EventFieldRef {
path: "level",
value: b"\"high\"",
array_trail: &trail_a,
is_number: false,
},
EventFieldRef {
path: "status",
value: b"\"active\"",
array_trail: &trail_b,
is_number: false,
},
];
assert!(
cm.matches_for_fields_ref(&conflicting, &mut bufs)
.is_empty()
);
let compatible = vec![
EventFieldRef {
path: "level",
value: b"\"high\"",
array_trail: &trail_a,
is_number: false,
},
EventFieldRef {
path: "status",
value: b"\"active\"",
array_trail: &trail_a,
is_number: false,
},
];
assert_eq!(
cm.matches_for_fields_ref(&compatible, &mut bufs),
vec!["p1"]
);
}
#[test]
fn test_core_matcher_ref_exists_false() {
let cm = cm_exists_false();
let mut bufs = NfaBuffers::new();
let without = vec![EventFieldRef {
path: "other",
value: b"123",
array_trail: &[],
is_number: false,
}];
assert_eq!(cm.matches_for_fields_ref(&without, &mut bufs), vec!["p1"]);
let with = vec![EventFieldRef {
path: "gone",
value: b"here",
array_trail: &[],
is_number: false,
}];
assert!(cm.matches_for_fields_ref(&with, &mut bufs).is_empty());
}
#[test]
fn test_core_matcher_direct_multi_field() {
use std::sync::Arc;
let cm = cm_two_field();
let mut bufs = NfaBuffers::new();
let fields = vec![
crate::flatten_json::Field {
path: Arc::from(b"level".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"high\""),
array_trail: [].as_slice().into(),
is_number: false,
},
crate::flatten_json::Field {
path: Arc::from(b"status".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"active\""),
array_trail: [].as_slice().into(),
is_number: false,
},
];
assert_eq!(cm.matches_for_fields_direct(&fields, &mut bufs), vec!["p1"]);
let single = vec![crate::flatten_json::Field {
path: Arc::from(b"status".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"active\""),
array_trail: [].as_slice().into(),
is_number: false,
}];
assert!(cm.matches_for_fields_direct(&single, &mut bufs).is_empty());
}
#[test]
fn test_core_matcher_direct_array_trail_conflict() {
use crate::flatten_json::ArrayPos;
use std::sync::Arc;
let cm = cm_two_field();
let mut bufs = NfaBuffers::new();
let trail_a: crate::flatten_json::ArrayTrailVec =
[ArrayPos { array: 1, pos: 0 }].as_slice().into();
let trail_b: crate::flatten_json::ArrayTrailVec =
[ArrayPos { array: 1, pos: 1 }].as_slice().into();
let conflicting = vec![
crate::flatten_json::Field {
path: Arc::from(b"level".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"high\""),
array_trail: trail_a.clone(),
is_number: false,
},
crate::flatten_json::Field {
path: Arc::from(b"status".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"active\""),
array_trail: trail_b,
is_number: false,
},
];
assert!(
cm.matches_for_fields_direct(&conflicting, &mut bufs)
.is_empty()
);
let compatible = vec![
crate::flatten_json::Field {
path: Arc::from(b"level".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"high\""),
array_trail: trail_a.clone(),
is_number: false,
},
crate::flatten_json::Field {
path: Arc::from(b"status".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"active\""),
array_trail: trail_a,
is_number: false,
},
];
assert_eq!(
cm.matches_for_fields_direct(&compatible, &mut bufs),
vec!["p1"]
);
}
fn cm_same_field_twice() -> CoreMatcher<String> {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[
("a".to_string(), vec![Matcher::Exact("\"1\"".to_string())]),
("a".to_string(), vec![Matcher::Exact("\"1\"".to_string())]),
],
)
.unwrap();
cm
}
#[test]
fn test_core_matcher_no_self_match_owned() {
let cm = cm_same_field_twice();
let single = vec![EventField {
path: "a".to_string(),
value: "\"1\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert!(
cm.matches_for_fields(&single).is_empty(),
"single field must not self-match a two-condition pattern"
);
let two = vec![
EventField {
path: "a".to_string(),
value: "\"1\"".to_string(),
array_trail: vec![],
is_number: false,
},
EventField {
path: "a".to_string(),
value: "\"1\"".to_string(),
array_trail: vec![],
is_number: false,
},
];
assert_eq!(cm.matches_for_fields(&two), vec!["p1"]);
}
#[test]
fn test_core_matcher_no_self_match_ref() {
let cm = cm_same_field_twice();
let mut bufs = NfaBuffers::new();
let single = vec![EventFieldRef {
path: "a",
value: b"\"1\"",
array_trail: &[],
is_number: false,
}];
assert!(
cm.matches_for_fields_ref(&single, &mut bufs).is_empty(),
"single field must not self-match"
);
let two = vec![
EventFieldRef {
path: "a",
value: b"\"1\"",
array_trail: &[],
is_number: false,
},
EventFieldRef {
path: "a",
value: b"\"1\"",
array_trail: &[],
is_number: false,
},
];
assert_eq!(cm.matches_for_fields_ref(&two, &mut bufs), vec!["p1"]);
}
#[test]
fn test_core_matcher_no_self_match_direct() {
use std::sync::Arc;
let cm = cm_same_field_twice();
let mut bufs = NfaBuffers::new();
let single = vec![crate::flatten_json::Field {
path: Arc::from(b"a".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"1\""),
array_trail: [].as_slice().into(),
is_number: false,
}];
assert!(
cm.matches_for_fields_direct(&single, &mut bufs).is_empty(),
"single field must not self-match"
);
let two = vec![
crate::flatten_json::Field {
path: Arc::from(b"a".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"1\""),
array_trail: [].as_slice().into(),
is_number: false,
},
crate::flatten_json::Field {
path: Arc::from(b"a".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"1\""),
array_trail: [].as_slice().into(),
is_number: false,
},
];
assert_eq!(cm.matches_for_fields_direct(&two, &mut bufs), vec!["p1"]);
}
fn cm_exists_true_then_value() -> CoreMatcher<String> {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[
("a".to_string(), vec![Matcher::Exists(true)]),
("a".to_string(), vec![Matcher::Exact("\"1\"".to_string())]),
],
)
.unwrap();
cm
}
#[test]
fn test_core_matcher_exists_true_no_self_match_owned() {
let cm = cm_exists_true_then_value();
let single = vec![EventField {
path: "a".to_string(),
value: "\"1\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert!(
cm.matches_for_fields(&single).is_empty(),
"single field must not satisfy exists:true AND value on the same field"
);
let two = vec![
EventField {
path: "a".to_string(),
value: "\"1\"".to_string(),
array_trail: vec![],
is_number: false,
},
EventField {
path: "a".to_string(),
value: "\"1\"".to_string(),
array_trail: vec![],
is_number: false,
},
];
assert_eq!(cm.matches_for_fields(&two), vec!["p1"]);
}
#[test]
fn test_core_matcher_exists_true_no_self_match_ref() {
let cm = cm_exists_true_then_value();
let mut bufs = NfaBuffers::new();
let single = vec![EventFieldRef {
path: "a",
value: b"\"1\"",
array_trail: &[],
is_number: false,
}];
assert!(
cm.matches_for_fields_ref(&single, &mut bufs).is_empty(),
"single field must not satisfy exists:true AND value"
);
let two = vec![
EventFieldRef {
path: "a",
value: b"\"1\"",
array_trail: &[],
is_number: false,
},
EventFieldRef {
path: "a",
value: b"\"1\"",
array_trail: &[],
is_number: false,
},
];
assert_eq!(cm.matches_for_fields_ref(&two, &mut bufs), vec!["p1"]);
}
#[test]
fn test_core_matcher_exists_true_no_self_match_direct() {
use std::sync::Arc;
let cm = cm_exists_true_then_value();
let mut bufs = NfaBuffers::new();
let single = vec![crate::flatten_json::Field {
path: Arc::from(b"a".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"1\""),
array_trail: [].as_slice().into(),
is_number: false,
}];
assert!(
cm.matches_for_fields_direct(&single, &mut bufs).is_empty(),
"single field must not satisfy exists:true AND value"
);
let two = vec![
crate::flatten_json::Field {
path: Arc::from(b"a".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"1\""),
array_trail: [].as_slice().into(),
is_number: false,
},
crate::flatten_json::Field {
path: Arc::from(b"a".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"\"1\""),
array_trail: [].as_slice().into(),
is_number: false,
},
];
assert_eq!(cm.matches_for_fields_direct(&two, &mut bufs), vec!["p1"]);
}
#[test]
fn test_core_matcher_direct_exists_false() {
use std::sync::Arc;
let cm = cm_exists_false();
let mut bufs = NfaBuffers::new();
let without = vec![crate::flatten_json::Field {
path: Arc::from(b"other".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"123"),
array_trail: [].as_slice().into(),
is_number: false,
}];
assert_eq!(
cm.matches_for_fields_direct(&without, &mut bufs),
vec!["p1"]
);
let with = vec![crate::flatten_json::Field {
path: Arc::from(b"gone".as_slice()),
val: crate::flatten_json::FieldValue::Borrowed(b"here"),
array_trail: [].as_slice().into(),
is_number: false,
}];
assert!(cm.matches_for_fields_direct(&with, &mut bufs).is_empty());
}
#[test]
fn test_add_string_transition_basic() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[(
"value".to_string(),
vec![Matcher::Exact("\"hello\"".to_string())],
)],
)
.unwrap();
let fields = vec![EventField {
path: "value".to_string(),
value: "\"hello\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&fields), vec!["p1".to_string()]);
let wrong = vec![EventField {
path: "value".to_string(),
value: "\"world\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert!(cm.matches_for_fields(&wrong).is_empty());
}
#[test]
fn test_add_string_transitions_bulk() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[(
"color".to_string(),
vec![
Matcher::Exact("\"red\"".to_string()),
Matcher::Exact("\"blue\"".to_string()),
],
)],
)
.unwrap();
let red = vec![EventField {
path: "color".to_string(),
value: "\"red\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&red), vec!["p1".to_string()]);
let blue = vec![EventField {
path: "color".to_string(),
value: "\"blue\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&blue), vec!["p1".to_string()]);
let green = vec![EventField {
path: "color".to_string(),
value: "\"green\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert!(cm.matches_for_fields(&green).is_empty());
}
#[test]
fn test_add_numeric_transition() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[("count".to_string(), vec![Matcher::NumericExact(42.0)])],
)
.unwrap();
let exact = vec![EventField {
path: "count".to_string(),
value: "42".to_string(),
array_trail: vec![],
is_number: true,
}];
assert_eq!(cm.matches_for_fields(&exact), vec!["p1".to_string()]);
let different = vec![EventField {
path: "count".to_string(),
value: "43".to_string(),
array_trail: vec![],
is_number: true,
}];
assert!(cm.matches_for_fields(&different).is_empty());
}
#[test]
fn test_add_prefix_transition() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[(
"status".to_string(),
vec![Matcher::Prefix("active".to_string())],
)],
)
.unwrap();
let matches_prefix = vec![EventField {
path: "status".to_string(),
value: "\"active-now\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(
cm.matches_for_fields(&matches_prefix),
vec!["p1".to_string()]
);
let no_prefix = vec![EventField {
path: "status".to_string(),
value: "\"inactive\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert!(cm.matches_for_fields(&no_prefix).is_empty());
}
#[test]
fn test_add_suffix_transition() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[(
"path".to_string(),
vec![Matcher::Suffix("json".to_string())],
)],
)
.unwrap();
let matches_suffix = vec![EventField {
path: "path".to_string(),
value: "\"data.json\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(
cm.matches_for_fields(&matches_suffix),
vec!["p1".to_string()]
);
let no_suffix = vec![EventField {
path: "path".to_string(),
value: "\"data.txt\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert!(cm.matches_for_fields(&no_suffix).is_empty());
}
#[test]
fn test_add_wildcard_transition() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[(
"name".to_string(),
vec![Matcher::Wildcard("*ello".to_string())],
)],
)
.unwrap();
let matches = vec![EventField {
path: "name".to_string(),
value: "\"hello\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&matches), vec!["p1".to_string()]);
let no_match = vec![EventField {
path: "name".to_string(),
value: "\"world\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert!(cm.matches_for_fields(&no_match).is_empty());
}
#[test]
fn test_add_string_transition_singleton_opt() {
let mvm: MutableValueMatcher<String> = MutableValueMatcher::new();
let next = mvm.add_string_transition(b"hello", 0).unwrap();
let next2 = mvm.add_string_transition(b"hello", 0).unwrap();
assert!(std::ptr::eq(Rc::as_ptr(&next), Rc::as_ptr(&next2)));
}
#[test]
fn test_add_numeric_has_numbers_flag() {
let mvm: MutableValueMatcher<String> = MutableValueMatcher::new();
assert!(!mvm.has_numbers.get());
mvm.add_numeric_transition(42.0, 0).unwrap();
assert!(
mvm.has_numbers.get(),
"numeric flag should be set after adding numeric"
);
}
#[test]
fn test_maybe_q_number_requires_both_has_numbers_and_is_number() {
let mvm: MutableValueMatcher<String> = MutableValueMatcher::new();
assert!(mvm.maybe_q_number(b"123", true).is_none());
assert!(mvm.maybe_q_number(b"abc", false).is_none());
mvm.has_numbers.set(true);
assert!(mvm.maybe_q_number(b"abc", false).is_none());
assert!(mvm.maybe_q_number(b"123", true).is_some());
}
#[test]
fn test_add_transition_all_exact_returns_one_shared_next_state() {
let fm: Rc<MutableFieldMatcher<String>> = Rc::new(MutableFieldMatcher::new());
let matchers = vec![
Matcher::Exact("\"a\"".to_string()),
Matcher::Exact("\"b\"".to_string()),
Matcher::Exact("\"c\"".to_string()),
];
assert_eq!(fm.add_transition("p", &matchers, 0).unwrap().len(), 1);
let fm2: Rc<MutableFieldMatcher<String>> = Rc::new(MutableFieldMatcher::new());
let two = vec![
Matcher::Exact("\"x\"".to_string()),
Matcher::Exact("\"y\"".to_string()),
];
assert_eq!(fm2.add_transition("p", &two, 0).unwrap().len(), 1);
let fm3: Rc<MutableFieldMatcher<String>> = Rc::new(MutableFieldMatcher::new());
let one = vec![Matcher::Exact("\"only\"".to_string())];
assert_eq!(fm3.add_transition("p", &one, 0).unwrap().len(), 1);
}
#[test]
fn test_add_transition_mixed_exact_and_numeric_keeps_numeric_path() {
let fm: Rc<MutableFieldMatcher<String>> = Rc::new(MutableFieldMatcher::new());
let matchers = vec![
Matcher::Exact("\"a\"".to_string()),
Matcher::NumericExact(42.0),
];
assert_eq!(fm.add_transition("p", &matchers, 0).unwrap().len(), 2);
let transitions = fm.transitions.borrow();
let vm = transitions.get("p").expect("value matcher present");
assert!(vm.has_numbers.get());
}
#[test]
fn test_add_prefix_multiple_patterns() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[("verb".to_string(), vec![Matcher::Prefix("re".to_string())])],
)
.unwrap();
cm.add_pattern(
"p2".to_string(),
&[("verb".to_string(), vec![Matcher::Prefix("un".to_string())])],
)
.unwrap();
let re_match = vec![EventField {
path: "verb".to_string(),
value: "\"replace\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&re_match), vec!["p1".to_string()]);
let un_match = vec![EventField {
path: "verb".to_string(),
value: "\"undo\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&un_match), vec!["p2".to_string()]);
}
#[test]
fn test_add_suffix_multiple_patterns() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[("ext".to_string(), vec![Matcher::Suffix("log".to_string())])],
)
.unwrap();
cm.add_pattern(
"p2".to_string(),
&[("ext".to_string(), vec![Matcher::Suffix("txt".to_string())])],
)
.unwrap();
let log_match = vec![EventField {
path: "ext".to_string(),
value: "\"app.log\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&log_match), vec!["p1".to_string()]);
let txt_match = vec![EventField {
path: "ext".to_string(),
value: "\"readme.txt\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&txt_match), vec!["p2".to_string()]);
}
#[test]
fn test_quote_wrap_via_wildcard() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[(
"name".to_string(),
vec![Matcher::Wildcard("h*o".to_string())],
)],
)
.unwrap();
let matches = vec![EventField {
path: "name".to_string(),
value: "\"hello\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&matches), vec!["p1".to_string()]);
}
#[test]
fn test_quote_wrap_via_shellstyle() {
let cm: CoreMatcher<String> = CoreMatcher::new();
cm.add_pattern(
"p1".to_string(),
&[(
"file".to_string(),
vec![Matcher::Shellstyle("*.log".to_string())],
)],
)
.unwrap();
let matches = vec![EventField {
path: "file".to_string(),
value: "\"app.log\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&matches), vec!["p1".to_string()]);
}
#[test]
fn test_check_budget_via_patterns() {
let cm = CoreMatcher::<String>::new();
let result = cm.add_pattern(
"p1".to_string(),
&[(
"field".to_string(),
vec![Matcher::Exact("\"value\"".to_string())],
)],
);
assert!(
result.is_ok(),
"simple pattern should pass default budget check"
);
let result2 = cm.add_pattern(
"p2".to_string(),
&[(
"other".to_string(),
vec![Matcher::Exact("\"data\"".to_string())],
)],
);
assert!(result2.is_ok(), "second pattern should also pass budget");
}
#[test]
fn test_merge_into_main_arena_via_patterns() {
let cm = CoreMatcher::<String>::new();
let r1 = cm.add_pattern(
"p1".to_string(),
&[(
"x".to_string(),
vec![Matcher::Wildcard("*ello".to_string())],
)],
);
assert!(r1.is_ok(), "first wildcard should succeed");
let r2 = cm.add_pattern(
"p2".to_string(),
&[(
"x".to_string(),
vec![Matcher::Wildcard("w*rld".to_string())],
)],
);
assert!(r2.is_ok(), "second wildcard should merge successfully");
let hello = vec![EventField {
path: "x".to_string(),
value: "\"hello\"".to_string(),
array_trail: vec![],
is_number: false,
}];
let matches = cm.matches_for_fields(&hello);
assert_eq!(
matches,
vec!["p1"],
"first pattern should match after merge"
);
let world = vec![EventField {
path: "x".to_string(),
value: "\"world\"".to_string(),
array_trail: vec![],
is_number: false,
}];
let matches2 = cm.matches_for_fields(&world);
assert_eq!(
matches2,
vec!["p2"],
"second pattern should match after merge"
);
}
#[test]
fn test_singleton_to_arena_transition() {
let cm = CoreMatcher::<String>::new();
cm.add_pattern(
"p1".to_string(),
&[(
"val".to_string(),
vec![Matcher::Exact("\"hello\"".to_string())],
)],
)
.unwrap();
cm.add_pattern(
"p2".to_string(),
&[(
"val".to_string(),
vec![Matcher::Wildcard("h*o".to_string())],
)],
)
.unwrap();
let hello = vec![EventField {
path: "val".to_string(),
value: "\"hello\"".to_string(),
array_trail: vec![],
is_number: false,
}];
let matches = cm.matches_for_fields(&hello);
assert!(
!matches.is_empty(),
"singleton-to-arena merge should preserve matches"
);
}
#[test]
fn test_budget_enforcement_via_matching() {
let cm = CoreMatcher::<String>::new();
let result = cm.add_pattern(
"p1".to_string(),
&[("f".to_string(), vec![Matcher::Prefix("test".to_string())])],
);
assert!(
result.is_ok(),
"prefix pattern should be built successfully"
);
let fields = vec![EventField {
path: "f".to_string(),
value: "\"testing\"".to_string(),
array_trail: vec![],
is_number: false,
}];
assert_eq!(cm.matches_for_fields(&fields), vec!["p1".to_string()]);
}
#[test]
fn test_take_singleton_idempotent() {
let mvm: MutableValueMatcher<String> = MutableValueMatcher::new();
mvm.add_string_transition(b"test", 0).ok();
let first = mvm.take_singleton_as_arena();
assert!(first.is_some(), "first call should return Some");
let second = mvm.take_singleton_as_arena();
assert!(
second.is_none(),
"second call should return None (singleton consumed)"
);
}
#[test]
fn test_check_budget_boundary_condition() {
let cm = CoreMatcher::<String>::new();
for i in 0..5 {
let result = cm.add_pattern(
format!("p{i}"),
&[(
"f".to_string(),
vec![Matcher::Exact(format!("\"pattern{i}\""))],
)],
);
assert!(
result.is_ok(),
"pattern {i} should succeed with default budget"
);
}
for i in 0..5 {
let fields = vec![EventField {
path: "f".to_string(),
value: format!("\"pattern{i}\""),
array_trail: vec![],
is_number: false,
}];
assert_eq!(
cm.matches_for_fields(&fields),
vec![format!("p{i}")],
"pattern {} should match",
i
);
}
}
#[test]
fn test_merge_into_main_arena_boundary() {
let cm = CoreMatcher::<String>::new();
for i in 0..3 {
let result = cm.add_pattern(
format!("p{i}"),
&[("x".to_string(), vec![Matcher::Prefix(format!("prefix{i}"))])],
);
assert!(result.is_ok(), "pattern {i} merge should succeed");
}
for i in 0..3 {
let fields = vec![EventField {
path: "x".to_string(),
value: format!("\"prefix{i}_test\""),
array_trail: vec![],
is_number: false,
}];
let matches = cm.matches_for_fields(&fields);
assert_eq!(
matches,
vec![format!("p{i}")],
"merged pattern {} should match",
i
);
}
}
#[test]
fn test_lookbehind_combined_keeps_primary_alternation() {
let lb = parse_regexp("a").unwrap();
let primary = parse_regexp("x|y").unwrap();
assert_eq!(primary.len(), 2);
let combined = build_lookbehind_combined_pattern(&lb, &primary);
assert_eq!(combined.len(), 2, "both primary alternatives must survive");
}
#[test]
fn test_add_transition_mixed_with_prefix_keeps_prefix() {
let fm: MutableFieldMatcher<String> = MutableFieldMatcher::new();
let matchers = vec![
Matcher::Exact("\"aa\"".to_string()),
Matcher::Exact("\"bb\"".to_string()),
Matcher::Prefix("cc".to_string()),
];
fm.add_transition("x", &matchers, 0).unwrap();
let mut bufs = NfaBuffers::new();
let results = fm.transition_on("x", &qv(b"ccZ"), false, &mut bufs);
assert!(!results.is_empty(), "Prefix matcher must not be dropped");
}
#[test]
fn test_singleton_coexisting_with_multicondition() {
let vm: MutableValueMatcher<String> = MutableValueMatcher::new();
let exact_fm = vm
.add_transition(&Matcher::Exact("\"hello\"".to_string()), 0)
.unwrap();
let pat = crate::json::parse_pattern(
r#"{"f": [{"regexp": "(?=h)hello"}]}"#,
&crate::PatternLimits::default(),
)
.unwrap();
let mc_matcher = pat
.into_values()
.next()
.unwrap()
.into_iter()
.next()
.unwrap();
vm.add_transition(&mc_matcher, 0).unwrap();
assert!(!vm.multi_condition_nfas.borrow().is_empty());
let mut bufs = NfaBuffers::new();
let results = vm.transition_on(&qv(b"hello"), false, &mut bufs);
assert!(
results.iter().any(|r| Rc::ptr_eq(r, &exact_fm)),
"singleton transition must be emitted alongside multi-condition NFAs"
);
}
#[test]
fn test_core_matcher_lookaround_conditions() {
let cm: CoreMatcher<String> = CoreMatcher::new();
let pos = crate::json::parse_pattern(
r#"{"v": [{"regexp": "foo(?=bar)bar"}]}"#,
&crate::PatternLimits::default(),
)
.unwrap();
cm.add_pattern("pos".to_string(), &pos.into_iter().collect::<Vec<_>>())
.unwrap();
let neg = crate::json::parse_pattern(
r#"{"w": [{"regexp": "foo(?!bar)baz"}]}"#,
&crate::PatternLimits::default(),
)
.unwrap();
cm.add_pattern("neg".to_string(), &neg.into_iter().collect::<Vec<_>>())
.unwrap();
let m = cm.matches_for_fields(&[EventField {
path: "v".to_string(),
value: "\"foobar\"".to_string(),
array_trail: vec![],
is_number: false,
}]);
assert_eq!(m, vec!["pos".to_string()], "positive lookahead must match");
let m = cm.matches_for_fields(&[EventField {
path: "w".to_string(),
value: "\"foobaz\"".to_string(),
array_trail: vec![],
is_number: false,
}]);
assert_eq!(m, vec!["neg".to_string()], "negative lookahead must match");
}
#[test]
fn test_merge_into_main_arena_budget_boundary() {
const MERGED_SIZE: usize = 2360;
let p1 = crate::json::parse_pattern(
r#"{"x": [{"regexp": "aaaa"}]}"#,
&crate::PatternLimits::default(),
)
.unwrap();
let p2 = crate::json::parse_pattern(
r#"{"x": [{"regexp": "bbbb"}]}"#,
&crate::PatternLimits::default(),
)
.unwrap();
let p1v: Vec<_> = p1.into_iter().collect();
let p2v: Vec<_> = p2.into_iter().collect();
let cm = CoreMatcher::<String> {
root: Rc::new(MutableFieldMatcher::new()),
arena_byte_budget: MERGED_SIZE,
};
cm.add_pattern("a".to_string(), &p1v).unwrap();
cm.add_pattern("b".to_string(), &p2v)
.expect("merge at budget == merged size must succeed");
let cm2 = CoreMatcher::<String> {
root: Rc::new(MutableFieldMatcher::new()),
arena_byte_budget: MERGED_SIZE - 1,
};
cm2.add_pattern("a".to_string(), &p1v).unwrap();
assert!(
cm2.add_pattern("b".to_string(), &p2v).is_err(),
"merge exceeding budget by one byte must be rejected"
);
}
}