use std::sync::Arc;
use rustc_hash::FxHashSet;
use smallvec::{smallvec, SmallVec};
use super::small_table::{AccelInfo, FieldMatcher, BYTE_CEILING};
use super::sparse_set::SparseSet;
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct StateId(u32);
impl StateId {
pub const NONE: Self = Self(u32::MAX);
#[inline]
pub fn from_index(index: usize) -> Self {
Self(index as u32)
}
#[inline]
pub fn is_none(self) -> bool {
self.0 == u32::MAX
}
#[inline]
pub fn index(self) -> usize {
self.0 as usize
}
}
#[derive(Clone, Default)]
pub struct ArenaFaState {
pub table: ArenaSmallTable,
pub field_transitions: SmallVec<[Arc<FieldMatcher>; 1]>,
pub epsilon_closure: SmallVec<[StateId; 4]>,
}
impl std::fmt::Debug for ArenaFaState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ArenaFaState")
.field("table", &self.table)
.field("field_transitions_count", &self.field_transitions.len())
.finish()
}
}
impl ArenaFaState {
pub fn new() -> Self {
Self::default()
}
pub fn with_table(table: ArenaSmallTable) -> Self {
Self {
table,
field_transitions: SmallVec::new(),
epsilon_closure: SmallVec::new(),
}
}
}
#[derive(Clone, Debug)]
pub struct ArenaSmallTable {
pub ceilings: SmallVec<[u8; 8]>,
pub steps: SmallVec<[StateId; 8]>,
pub epsilons: SmallVec<[StateId; 2]>,
pub spinout: StateId,
pub accel: Option<AccelInfo>,
pub default: StateId,
}
impl Default for ArenaSmallTable {
fn default() -> Self {
Self::new()
}
}
impl ArenaSmallTable {
pub fn new() -> Self {
Self {
ceilings: smallvec![BYTE_CEILING as u8],
steps: smallvec![StateId::NONE],
epsilons: SmallVec::new(),
spinout: StateId::NONE,
accel: None,
default: StateId::NONE,
}
}
pub fn with_mappings(default: StateId, bytes: &[u8], targets: &[StateId]) -> Self {
let mut unpacked = [StateId::NONE; BYTE_CEILING];
if !default.is_none() {
for slot in unpacked.iter_mut() {
*slot = default;
}
}
for (b, t) in bytes.iter().zip(targets.iter()) {
unpacked[*b as usize] = *t;
}
let mut table = Self::new();
table.default = default;
table.pack(&unpacked);
table
}
pub fn pack(&mut self, unpacked: &[StateId; BYTE_CEILING]) {
self.ceilings.clear();
self.steps.clear();
let mut current = unpacked[0];
for (i, &state_id) in unpacked.iter().enumerate() {
if state_id != current {
self.ceilings.push(i as u8);
self.steps.push(current);
current = state_id;
}
}
self.ceilings.push(BYTE_CEILING as u8);
self.steps.push(current);
}
pub fn set_transition(&mut self, byte: u8, target: StateId) {
let mut unpacked = [StateId::NONE; BYTE_CEILING];
unpack_arena_table(self, &mut unpacked);
unpacked[byte as usize] = target;
self.pack(&unpacked);
}
#[inline]
pub fn dstep(&self, byte: u8) -> StateId {
for (i, &ceiling) in self.ceilings.iter().enumerate() {
if byte < ceiling {
return self.steps[i];
}
}
StateId::NONE
}
#[inline]
pub fn step(&self, byte: u8) -> (StateId, &[StateId]) {
(self.dstep(byte), &self.epsilons)
}
pub fn sparse_transitions(&self) -> impl Iterator<Item = (u8, StateId)> + '_ {
let mut result = Vec::new();
let mut prev_ceiling: u8 = 0;
for (i, &ceiling) in self.ceilings.iter().enumerate() {
let state = self.steps[i];
if state != self.default {
for byte in prev_ceiling..ceiling {
result.push((byte, state));
}
}
prev_ceiling = ceiling;
}
result.into_iter()
}
}
#[derive(Clone, Default)]
pub struct StateArena {
states: Vec<ArenaFaState>,
}
impl std::fmt::Debug for StateArena {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("StateArena")
.field("states_count", &self.states.len())
.finish()
}
}
impl StateArena {
pub fn new() -> Self {
Self { states: Vec::new() }
}
pub fn with_capacity(capacity: usize) -> Self {
Self {
states: Vec::with_capacity(capacity),
}
}
pub fn estimated_byte_size(&self) -> usize {
self.states.capacity() * std::mem::size_of::<ArenaFaState>()
}
pub fn alloc(&mut self) -> StateId {
let id = StateId(self.states.len() as u32);
self.states.push(ArenaFaState::default());
id
}
pub fn alloc_with_table(&mut self, table: ArenaSmallTable) -> StateId {
let id = StateId(self.states.len() as u32);
self.states.push(ArenaFaState::with_table(table));
id
}
#[inline]
pub fn get(&self, id: StateId) -> Option<&ArenaFaState> {
if id.is_none() {
None
} else {
self.states.get(id.index())
}
}
#[inline]
pub fn get_mut(&mut self, id: StateId) -> Option<&mut ArenaFaState> {
if id.is_none() {
None
} else {
self.states.get_mut(id.index())
}
}
pub fn len(&self) -> usize {
self.states.len()
}
pub fn is_empty(&self) -> bool {
self.states.is_empty()
}
pub fn is_nondeterministic(&self) -> bool {
self.states
.iter()
.any(|state| !state.table.epsilons.is_empty())
}
pub fn precompute_epsilon_closures(&mut self) {
let arena_len = self.states.len();
if arena_len == 0 {
return;
}
let mut seen = SparseSet::new(arena_len);
let mut stack: Vec<StateId> = Vec::new();
let mut closures: Vec<SmallVec<[StateId; 4]>> = Vec::with_capacity(arena_len);
for state_idx in 0..arena_len {
let state_id = StateId::from_index(state_idx);
if self.states[state_idx].table.epsilons.is_empty() {
closures.push(smallvec![state_id]);
} else {
seen.clear();
stack.clear();
let mut closure: SmallVec<[StateId; 4]> = SmallVec::new();
closure.push(state_id);
stack.push(state_id);
seen.insert(state_idx);
while let Some(current_id) = stack.pop() {
if current_id.is_none() {
continue;
}
for &eps_id in &self.states[current_id.index()].table.epsilons {
if !eps_id.is_none() {
let idx = eps_id.index();
if idx < seen.capacity() && seen.insert(idx) {
closure.push(eps_id);
stack.push(eps_id);
}
}
}
}
closures.push(closure);
}
}
for (state_idx, closure) in closures.into_iter().enumerate() {
self.states[state_idx].epsilon_closure = closure;
}
}
}
impl std::ops::Index<StateId> for StateArena {
type Output = ArenaFaState;
#[inline]
fn index(&self, id: StateId) -> &Self::Output {
&self.states[id.index()]
}
}
impl std::ops::IndexMut<StateId> for StateArena {
#[inline]
fn index_mut(&mut self, id: StateId) -> &mut Self::Output {
&mut self.states[id.index()]
}
}
#[derive(Default)]
pub struct ArenaNfaBuffers {
pub current_states: Vec<StateId>,
pub next_states: Vec<StateId>,
pub transitions: Vec<Arc<FieldMatcher>>,
seen_transitions: FxHashSet<usize>,
}
impl ArenaNfaBuffers {
pub fn new() -> Self {
Self::default()
}
pub fn with_capacity() -> Self {
Self {
current_states: Vec::with_capacity(16),
next_states: Vec::with_capacity(16),
transitions: Vec::new(),
seen_transitions: FxHashSet::default(),
}
}
pub fn clear(&mut self) {
self.current_states.clear();
self.next_states.clear();
self.transitions.clear();
self.seen_transitions.clear();
}
}
pub const ARENA_VALUE_TERMINATOR: u8 = 0xF5;
#[inline]
fn try_accelerate_arena(table: &ArenaSmallTable, remaining: &[u8]) -> Option<usize> {
let accel = table.accel.as_ref()?;
match accel.len {
1 => memchr::memchr(accel.exit_bytes[0], remaining),
2 => memchr::memchr2(accel.exit_bytes[0], accel.exit_bytes[1], remaining),
3 => memchr::memchr3(
accel.exit_bytes[0],
accel.exit_bytes[1],
accel.exit_bytes[2],
remaining,
),
_ => None,
}
}
#[inline]
pub fn traverse_arena_nfa(
arena: &StateArena,
start: StateId,
val: &[u8],
bufs: &mut ArenaNfaBuffers,
) {
bufs.clear();
if start.is_none() {
return;
}
bufs.current_states.push(start);
let len = val.len();
let mut i = 0;
while i <= len {
if bufs.current_states.is_empty() {
break;
}
if i < len && bufs.current_states.len() == 1 {
let state_id = bufs.current_states[0];
let state = &arena[state_id];
if let Some(skip) = try_accelerate_arena(&state.table, &val[i..]) {
if skip > 0 {
i += skip;
continue;
}
}
}
let byte = if i < len {
val[i]
} else {
ARENA_VALUE_TERMINATOR
};
let ArenaNfaBuffers {
ref mut current_states,
ref mut next_states,
ref mut transitions,
ref mut seen_transitions,
} = *bufs;
for &state_id in current_states.iter() {
for &ec_state_id in &arena[state_id].epsilon_closure {
let ec_state = &arena[ec_state_id];
for ft in &ec_state.field_transitions {
let ptr = Arc::as_ptr(ft) as usize;
if seen_transitions.insert(ptr) {
transitions.push(ft.clone());
}
}
if !ec_state.table.spinout.is_none() && byte != ARENA_VALUE_TERMINATOR {
next_states.push(ec_state_id);
}
let next = ec_state.table.dstep(byte);
if !next.is_none() {
next_states.push(next);
}
}
}
current_states.clear();
std::mem::swap(current_states, next_states);
i += 1;
}
let ArenaNfaBuffers {
ref current_states,
ref mut transitions,
ref mut seen_transitions,
..
} = *bufs;
for &state_id in current_states.iter() {
for &ec_state_id in &arena[state_id].epsilon_closure {
for ft in &arena[ec_state_id].field_transitions {
let ptr = Arc::as_ptr(ft) as usize;
if seen_transitions.insert(ptr) {
transitions.push(ft.clone());
}
}
}
}
}
#[inline]
pub fn traverse_arena_dfa(
arena: &StateArena,
start: StateId,
val: &[u8],
transitions: &mut Vec<Arc<FieldMatcher>>,
) {
if start.is_none() {
return;
}
let mut current = start;
for i in 0..=val.len() {
let state = &arena[current];
transitions.extend(arena[current].field_transitions.iter().cloned());
let byte = if i < val.len() {
val[i]
} else {
ARENA_VALUE_TERMINATOR
};
let next = state.table.dstep(byte);
if next.is_none() {
return;
}
current = next;
}
transitions.extend(arena[current].field_transitions.iter().cloned());
}
#[inline]
pub fn traverse_arena_dfa_backward(
arena: &StateArena,
start: StateId,
val: &[u8],
transitions: &mut Vec<Arc<FieldMatcher>>,
) {
if start.is_none() || val.is_empty() {
return;
}
let mut current = start;
for i in (0..val.len()).rev() {
let next = arena[current].table.dstep(val[i]);
if next.is_none() {
return;
}
current = next;
if !arena[current].field_transitions.is_empty() {
transitions.extend(arena[current].field_transitions.iter().cloned());
}
}
}
pub fn merge_arena_dfas(
arena1: &StateArena,
start1: StateId,
arena2: &StateArena,
start2: StateId,
) -> (StateArena, StateId) {
use std::collections::HashMap;
if start1.is_none() && start2.is_none() {
return (StateArena::new(), StateId::NONE);
}
if start1.is_none() {
return clone_arena_subset(arena2, start2);
}
if start2.is_none() {
return clone_arena_subset(arena1, start1);
}
type MemoKey = (i32, i32);
let mut memo: HashMap<MemoKey, StateId> = HashMap::new();
let mut new_arena = StateArena::new();
let start =
merge_arena_states_recursive(arena1, start1, arena2, start2, &mut new_arena, &mut memo);
new_arena.precompute_epsilon_closures();
(new_arena, start)
}
fn clone_arena_subset(arena: &StateArena, start: StateId) -> (StateArena, StateId) {
use std::collections::HashMap;
if start.is_none() {
return (StateArena::new(), StateId::NONE);
}
let mut new_arena = StateArena::new();
let mut id_map: HashMap<u32, StateId> = HashMap::new();
clone_state_recursive(arena, start, &mut new_arena, &mut id_map);
let new_start = id_map.get(&start.0).copied().unwrap_or(StateId::NONE);
new_arena.precompute_epsilon_closures();
(new_arena, new_start)
}
fn clone_state_recursive(
arena: &StateArena,
state_id: StateId,
new_arena: &mut StateArena,
id_map: &mut std::collections::HashMap<u32, StateId>,
) -> StateId {
if state_id.is_none() {
return StateId::NONE;
}
if let Some(&new_id) = id_map.get(&state_id.0) {
return new_id;
}
let new_id = new_arena.alloc();
id_map.insert(state_id.0, new_id);
new_arena[new_id].field_transitions = arena[state_id].field_transitions.clone();
let old_state = &arena[state_id];
let old_table = &old_state.table;
let mut new_table = ArenaSmallTable {
ceilings: old_table.ceilings.clone(),
steps: SmallVec::with_capacity(old_table.steps.len()),
epsilons: SmallVec::with_capacity(old_table.epsilons.len()),
spinout: StateId::NONE,
accel: old_table.accel.clone(),
default: StateId::NONE,
};
for &step_id in &old_table.steps {
let new_step = clone_state_recursive(arena, step_id, new_arena, id_map);
new_table.steps.push(new_step);
}
for &eps_id in &old_table.epsilons {
let new_eps = clone_state_recursive(arena, eps_id, new_arena, id_map);
new_table.epsilons.push(new_eps);
}
if !old_table.spinout.is_none() {
new_table.spinout = clone_state_recursive(arena, old_table.spinout, new_arena, id_map);
}
if !old_table.default.is_none() {
new_table.default = clone_state_recursive(arena, old_table.default, new_arena, id_map);
}
new_arena[new_id].table = new_table;
new_id
}
fn merge_arena_states_recursive(
arena1: &StateArena,
state1: StateId,
arena2: &StateArena,
state2: StateId,
new_arena: &mut StateArena,
memo: &mut std::collections::HashMap<(i32, i32), StateId>,
) -> StateId {
let key1 = if state1.is_none() {
-1
} else {
state1.0 as i32
};
let key2 = if state2.is_none() {
-1
} else {
state2.0 as i32
};
let key = (key1, key2);
if let Some(&cached) = memo.get(&key) {
return cached;
}
if state1.is_none() && state2.is_none() {
return StateId::NONE;
}
let new_id = new_arena.alloc();
memo.insert(key, new_id);
if state1.is_none() {
let s2 = &arena2[state2];
new_arena[new_id].field_transitions = s2.field_transitions.clone();
new_arena[new_id].table =
remap_table_recursive(arena2, &s2.table, arena1, new_arena, memo, false);
return new_id;
}
if state2.is_none() {
let s1 = &arena1[state1];
new_arena[new_id].field_transitions = s1.field_transitions.clone();
new_arena[new_id].table =
remap_table_recursive(arena1, &s1.table, arena2, new_arena, memo, true);
return new_id;
}
let s1 = &arena1[state1];
let s2 = &arena2[state2];
let mut field_transitions = s1.field_transitions.clone();
field_transitions.extend(s2.field_transitions.iter().cloned());
new_arena[new_id].field_transitions = field_transitions;
new_arena[new_id].table =
merge_arena_tables(arena1, &s1.table, arena2, &s2.table, new_arena, memo);
new_id
}
fn remap_table_recursive(
source_arena: &StateArena,
table: &ArenaSmallTable,
_other_arena: &StateArena,
new_arena: &mut StateArena,
memo: &mut std::collections::HashMap<(i32, i32), StateId>,
is_arena1: bool,
) -> ArenaSmallTable {
let mut new_table = ArenaSmallTable {
ceilings: table.ceilings.clone(),
steps: SmallVec::with_capacity(table.steps.len()),
epsilons: SmallVec::with_capacity(table.epsilons.len()),
spinout: StateId::NONE,
accel: table.accel.clone(),
default: StateId::NONE,
};
for &step_id in &table.steps {
if step_id.is_none() {
new_table.steps.push(StateId::NONE);
} else {
let merged = if is_arena1 {
merge_arena_states_recursive(
source_arena,
step_id,
_other_arena,
StateId::NONE,
new_arena,
memo,
)
} else {
merge_arena_states_recursive(
_other_arena,
StateId::NONE,
source_arena,
step_id,
new_arena,
memo,
)
};
new_table.steps.push(merged);
}
}
if !table.default.is_none() {
let merged = if is_arena1 {
merge_arena_states_recursive(
source_arena,
table.default,
_other_arena,
StateId::NONE,
new_arena,
memo,
)
} else {
merge_arena_states_recursive(
_other_arena,
StateId::NONE,
source_arena,
table.default,
new_arena,
memo,
)
};
new_table.default = merged;
}
for &eps_id in &table.epsilons {
if eps_id.is_none() {
new_table.epsilons.push(StateId::NONE);
} else {
let merged = if is_arena1 {
merge_arena_states_recursive(
source_arena,
eps_id,
_other_arena,
StateId::NONE,
new_arena,
memo,
)
} else {
merge_arena_states_recursive(
_other_arena,
StateId::NONE,
source_arena,
eps_id,
new_arena,
memo,
)
};
new_table.epsilons.push(merged);
}
}
if !table.spinout.is_none() {
let merged = if is_arena1 {
merge_arena_states_recursive(
source_arena,
table.spinout,
_other_arena,
StateId::NONE,
new_arena,
memo,
)
} else {
merge_arena_states_recursive(
_other_arena,
StateId::NONE,
source_arena,
table.spinout,
new_arena,
memo,
)
};
new_table.spinout = merged;
}
new_table
}
fn merge_arena_tables(
arena1: &StateArena,
table1: &ArenaSmallTable,
arena2: &StateArena,
table2: &ArenaSmallTable,
new_arena: &mut StateArena,
memo: &mut std::collections::HashMap<(i32, i32), StateId>,
) -> ArenaSmallTable {
let mut unpacked1 = [StateId::NONE; BYTE_CEILING];
let mut unpacked2 = [StateId::NONE; BYTE_CEILING];
unpack_arena_table(table1, &mut unpacked1);
unpack_arena_table(table2, &mut unpacked2);
let mut merged_unpacked = [StateId::NONE; BYTE_CEILING];
for i in 0..BYTE_CEILING {
let s1 = unpacked1[i];
let s2 = unpacked2[i];
merged_unpacked[i] = merge_arena_states_recursive(arena1, s1, arena2, s2, new_arena, memo);
}
let mut result = ArenaSmallTable::new();
result.pack(&merged_unpacked);
for &eps1 in &table1.epsilons {
let merged =
merge_arena_states_recursive(arena1, eps1, arena2, StateId::NONE, new_arena, memo);
if !merged.is_none() {
result.epsilons.push(merged);
}
}
for &eps2 in &table2.epsilons {
let merged =
merge_arena_states_recursive(arena1, StateId::NONE, arena2, eps2, new_arena, memo);
if !merged.is_none() {
result.epsilons.push(merged);
}
}
if !table1.spinout.is_none() || !table2.spinout.is_none() {
result.spinout = merge_arena_states_recursive(
arena1,
table1.spinout,
arena2,
table2.spinout,
new_arena,
memo,
);
}
result
}
fn unpack_arena_table(table: &ArenaSmallTable, unpacked: &mut [StateId; BYTE_CEILING]) {
let mut byte_idx = 0usize;
for (i, &ceiling) in table.ceilings.iter().enumerate() {
let ceiling = ceiling as usize;
while byte_idx < ceiling && byte_idx < BYTE_CEILING {
unpacked[byte_idx] = table.steps[i];
byte_idx += 1;
}
}
}
pub fn merge_arena_nfas(
arena1: &StateArena,
start1: StateId,
arena2: &StateArena,
start2: StateId,
) -> (StateArena, StateId) {
use std::collections::HashMap;
if start1.is_none() && start2.is_none() {
return (StateArena::new(), StateId::NONE);
}
if start1.is_none() {
return clone_arena_subset(arena2, start2);
}
if start2.is_none() {
return clone_arena_subset(arena1, start1);
}
type MemoKey = (i32, i32);
let mut memo: HashMap<MemoKey, StateId> = HashMap::new();
let mut new_arena = StateArena::new();
let start =
merge_arena_nfa_states_recursive(arena1, start1, arena2, start2, &mut new_arena, &mut memo);
new_arena.precompute_epsilon_closures();
(new_arena, start)
}
fn is_epsilon_only_state(arena: &StateArena, state_id: StateId) -> bool {
if state_id.is_none() {
return false;
}
let state = &arena[state_id];
!state.table.epsilons.is_empty()
&& state.table.ceilings.len() == 1
&& state.table.spinout.is_none()
&& state.field_transitions.is_empty()
}
fn flatten_epsilon_targets(arena: &StateArena, states: &[StateId]) -> SmallVec<[StateId; 2]> {
let mut targets = SmallVec::new();
for &state_id in states {
if !state_id.is_none() && is_epsilon_only_state(arena, state_id) {
for &eps_id in &arena[state_id].table.epsilons {
targets.push(eps_id);
}
} else {
targets.push(state_id);
}
}
targets
}
fn is_spinout_state(arena: &StateArena, state_id: StateId) -> bool {
if state_id.is_none() {
return false;
}
let state = &arena[state_id];
!state.table.spinout.is_none() && state.table.epsilons.len() == 1
}
fn merge_arena_nfa_states_recursive(
arena1: &StateArena,
state1: StateId,
arena2: &StateArena,
state2: StateId,
new_arena: &mut StateArena,
memo: &mut std::collections::HashMap<(i32, i32), StateId>,
) -> StateId {
let key1 = if state1.is_none() {
-1
} else {
state1.0 as i32
};
let key2 = if state2.is_none() {
-1
} else {
state2.0 as i32
};
let key = (key1, key2);
if let Some(&cached) = memo.get(&key) {
return cached;
}
if state1.is_none() && state2.is_none() {
return StateId::NONE;
}
let new_id = new_arena.alloc();
memo.insert(key, new_id);
if state1.is_none() {
let s2 = &arena2[state2];
new_arena[new_id].field_transitions = s2.field_transitions.clone();
new_arena[new_id].table =
remap_nfa_table_recursive(arena2, &s2.table, arena1, new_arena, memo, false);
return new_id;
}
if state2.is_none() {
let s1 = &arena1[state1];
new_arena[new_id].field_transitions = s1.field_transitions.clone();
new_arena[new_id].table =
remap_nfa_table_recursive(arena1, &s1.table, arena2, new_arena, memo, true);
return new_id;
}
let s1 = &arena1[state1];
let s2 = &arena2[state2];
let s1_has_spinout = is_spinout_state(arena1, state1);
let s2_has_spinout = is_spinout_state(arena2, state2);
let s1_has_epsilons = !s1.table.epsilons.is_empty();
let s2_has_epsilons = !s2.table.epsilons.is_empty();
if s1_has_spinout && s2_has_spinout {
let spinout1_eps = s1.table.epsilons[0];
let spinout2_eps = s2.table.epsilons[0];
let merged_eps = merge_arena_nfa_states_recursive(
arena1,
spinout1_eps,
arena2,
spinout2_eps,
new_arena,
memo,
);
let mut combined_table =
merge_nfa_tables_bytewise(arena1, &s1.table, arena2, &s2.table, new_arena, memo);
combined_table.spinout = new_id;
combined_table.epsilons = smallvec![merged_eps];
let mut field_transitions = s1.field_transitions.clone();
field_transitions.extend(s2.field_transitions.iter().cloned());
new_arena[new_id].table = combined_table;
new_arena[new_id].field_transitions = field_transitions;
return new_id;
}
if s1_has_epsilons || s2_has_epsilons {
let mut clone_map1: std::collections::HashMap<u32, StateId> =
std::collections::HashMap::new();
let mut clone_map2: std::collections::HashMap<u32, StateId> =
std::collections::HashMap::new();
let cloned1 = clone_state_into_arena(arena1, state1, new_arena, &mut clone_map1);
let cloned2 = clone_state_into_arena(arena2, state2, new_arena, &mut clone_map2);
let epsilons = flatten_epsilon_targets(new_arena, &[cloned1, cloned2]);
new_arena[new_id].table = ArenaSmallTable {
ceilings: smallvec![BYTE_CEILING as u8],
steps: smallvec![StateId::NONE],
epsilons,
spinout: StateId::NONE,
accel: None,
default: StateId::NONE,
};
return new_id;
}
let combined_table =
merge_nfa_tables_bytewise(arena1, &s1.table, arena2, &s2.table, new_arena, memo);
let mut field_transitions = s1.field_transitions.clone();
field_transitions.extend(s2.field_transitions.iter().cloned());
new_arena[new_id].table = combined_table;
new_arena[new_id].field_transitions = field_transitions;
new_id
}
fn clone_state_into_arena(
source_arena: &StateArena,
state_id: StateId,
target_arena: &mut StateArena,
id_map: &mut std::collections::HashMap<u32, StateId>,
) -> StateId {
if state_id.is_none() {
return StateId::NONE;
}
if let Some(&new_id) = id_map.get(&state_id.0) {
return new_id;
}
let new_id = target_arena.alloc();
id_map.insert(state_id.0, new_id);
target_arena[new_id].field_transitions = source_arena[state_id].field_transitions.clone();
let old_state = &source_arena[state_id];
let old_table = &old_state.table;
let mut new_table = ArenaSmallTable {
ceilings: old_table.ceilings.clone(),
steps: SmallVec::with_capacity(old_table.steps.len()),
epsilons: SmallVec::with_capacity(old_table.epsilons.len()),
spinout: StateId::NONE,
accel: old_table.accel.clone(),
default: StateId::NONE,
};
for &step_id in &old_table.steps {
let new_step = clone_state_into_arena(source_arena, step_id, target_arena, id_map);
new_table.steps.push(new_step);
}
for &eps_id in &old_table.epsilons {
let new_eps = clone_state_into_arena(source_arena, eps_id, target_arena, id_map);
new_table.epsilons.push(new_eps);
}
if !old_table.spinout.is_none() {
new_table.spinout =
clone_state_into_arena(source_arena, old_table.spinout, target_arena, id_map);
}
if !old_table.default.is_none() {
new_table.default =
clone_state_into_arena(source_arena, old_table.default, target_arena, id_map);
}
target_arena[new_id].table = new_table;
new_id
}
fn remap_nfa_table_recursive(
source_arena: &StateArena,
table: &ArenaSmallTable,
_other_arena: &StateArena,
new_arena: &mut StateArena,
memo: &mut std::collections::HashMap<(i32, i32), StateId>,
is_arena1: bool,
) -> ArenaSmallTable {
let mut new_table = ArenaSmallTable {
ceilings: table.ceilings.clone(),
steps: SmallVec::with_capacity(table.steps.len()),
epsilons: SmallVec::with_capacity(table.epsilons.len()),
spinout: StateId::NONE,
accel: table.accel.clone(),
default: StateId::NONE,
};
for &step_id in &table.steps {
if step_id.is_none() {
new_table.steps.push(StateId::NONE);
} else {
let merged = if is_arena1 {
merge_arena_nfa_states_recursive(
source_arena,
step_id,
_other_arena,
StateId::NONE,
new_arena,
memo,
)
} else {
merge_arena_nfa_states_recursive(
_other_arena,
StateId::NONE,
source_arena,
step_id,
new_arena,
memo,
)
};
new_table.steps.push(merged);
}
}
if !table.default.is_none() {
let merged = if is_arena1 {
merge_arena_nfa_states_recursive(
source_arena,
table.default,
_other_arena,
StateId::NONE,
new_arena,
memo,
)
} else {
merge_arena_nfa_states_recursive(
_other_arena,
StateId::NONE,
source_arena,
table.default,
new_arena,
memo,
)
};
new_table.default = merged;
}
for &eps_id in &table.epsilons {
if eps_id.is_none() {
new_table.epsilons.push(StateId::NONE);
} else {
let merged = if is_arena1 {
merge_arena_nfa_states_recursive(
source_arena,
eps_id,
_other_arena,
StateId::NONE,
new_arena,
memo,
)
} else {
merge_arena_nfa_states_recursive(
_other_arena,
StateId::NONE,
source_arena,
eps_id,
new_arena,
memo,
)
};
new_table.epsilons.push(merged);
}
}
if !table.spinout.is_none() {
let merged = if is_arena1 {
merge_arena_nfa_states_recursive(
source_arena,
table.spinout,
_other_arena,
StateId::NONE,
new_arena,
memo,
)
} else {
merge_arena_nfa_states_recursive(
_other_arena,
StateId::NONE,
source_arena,
table.spinout,
new_arena,
memo,
)
};
new_table.spinout = merged;
}
new_table
}
fn merge_nfa_tables_bytewise(
arena1: &StateArena,
table1: &ArenaSmallTable,
arena2: &StateArena,
table2: &ArenaSmallTable,
new_arena: &mut StateArena,
memo: &mut std::collections::HashMap<(i32, i32), StateId>,
) -> ArenaSmallTable {
let mut unpacked1 = [StateId::NONE; BYTE_CEILING];
let mut unpacked2 = [StateId::NONE; BYTE_CEILING];
unpack_arena_table(table1, &mut unpacked1);
unpack_arena_table(table2, &mut unpacked2);
let mut merged_unpacked = [StateId::NONE; BYTE_CEILING];
for i in 0..BYTE_CEILING {
let s1 = unpacked1[i];
let s2 = unpacked2[i];
merged_unpacked[i] =
merge_arena_nfa_states_recursive(arena1, s1, arena2, s2, new_arena, memo);
}
let mut result = ArenaSmallTable::new();
result.pack(&merged_unpacked);
for &eps1 in &table1.epsilons {
let merged =
merge_arena_nfa_states_recursive(arena1, eps1, arena2, StateId::NONE, new_arena, memo);
if !merged.is_none() {
result.epsilons.push(merged);
}
}
for &eps2 in &table2.epsilons {
let merged =
merge_arena_nfa_states_recursive(arena1, StateId::NONE, arena2, eps2, new_arena, memo);
if !merged.is_none() {
result.epsilons.push(merged);
}
}
if !table1.spinout.is_none() || !table2.spinout.is_none() {
result.spinout = merge_arena_nfa_states_recursive(
arena1,
table1.spinout,
arena2,
table2.spinout,
new_arena,
memo,
);
}
result
}
pub fn make_numeric_less_arena_fa(
bound: f64,
inclusive: bool,
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let bound_q = crate::numbits::q_num_from_f64(bound);
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let start = make_less_arena_fa_step(&bound_q, 0, inclusive, match_state, &mut arena);
arena.precompute_epsilon_closures();
(arena, start)
}
pub fn make_numeric_greater_arena_fa(
bound: f64,
inclusive: bool,
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let bound_q = crate::numbits::q_num_from_f64(bound);
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let start = make_greater_arena_fa_step(&bound_q, 0, inclusive, match_state, &mut arena);
arena.precompute_epsilon_closures();
(arena, start)
}
pub fn make_numeric_range_arena_fa(
lower: f64,
lower_incl: bool,
upper: f64,
upper_incl: bool,
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let lower_q = crate::numbits::q_num_from_f64(lower);
let upper_q = crate::numbits::q_num_from_f64(upper);
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let start = make_range_arena_fa_step(
&lower_q,
&upper_q,
0,
lower_incl,
upper_incl,
match_state,
&mut arena,
);
arena.precompute_epsilon_closures();
(arena, start)
}
fn make_less_arena_fa_step(
bound_q: &[u8],
index: usize,
inclusive: bool,
match_state: StateId,
arena: &mut StateArena,
) -> StateId {
if index >= bound_q.len() {
if inclusive {
let start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
return start;
} else {
return arena.alloc();
}
}
let bound_byte = bound_q[index];
let continuation = make_less_arena_fa_step(bound_q, index + 1, inclusive, match_state, arena);
let mut unpacked = [StateId::NONE; BYTE_CEILING];
unpacked[ARENA_VALUE_TERMINATOR as usize] = match_state;
for b in 0..bound_byte {
if b != ARENA_VALUE_TERMINATOR {
unpacked[b as usize] = match_state;
}
}
unpacked[bound_byte as usize] = continuation;
let mut table = ArenaSmallTable::new();
table.pack(&unpacked);
arena.alloc_with_table(table)
}
fn make_greater_arena_fa_step(
bound_q: &[u8],
index: usize,
inclusive: bool,
match_state: StateId,
arena: &mut StateArena,
) -> StateId {
if index >= bound_q.len() {
let mut unpacked = [match_state; BYTE_CEILING];
if !inclusive {
unpacked[ARENA_VALUE_TERMINATOR as usize] = StateId::NONE;
}
let mut table = ArenaSmallTable::new();
table.pack(&unpacked);
return arena.alloc_with_table(table);
}
let bound_byte = bound_q[index];
let continuation =
make_greater_arena_fa_step(bound_q, index + 1, inclusive, match_state, arena);
let mut unpacked = [StateId::NONE; BYTE_CEILING];
unpacked[bound_byte as usize] = continuation;
for b in (bound_byte + 1)..(BYTE_CEILING as u8) {
if b != ARENA_VALUE_TERMINATOR {
unpacked[b as usize] = match_state;
}
}
let mut table = ArenaSmallTable::new();
table.pack(&unpacked);
arena.alloc_with_table(table)
}
fn make_range_arena_fa_step(
lower_q: &[u8],
upper_q: &[u8],
index: usize,
lower_incl: bool,
upper_incl: bool,
match_state: StateId,
arena: &mut StateArena,
) -> StateId {
let lower_done = index >= lower_q.len();
let upper_done = index >= upper_q.len();
if lower_done && upper_done {
if lower_incl && upper_incl {
return arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
} else {
return arena.alloc();
}
}
if lower_done {
return make_less_arena_fa_step(upper_q, index, upper_incl, match_state, arena);
}
if upper_done {
return arena.alloc();
}
let lower_byte = lower_q[index];
let upper_byte = upper_q[index];
if lower_byte == upper_byte {
let continuation = make_range_arena_fa_step(
lower_q,
upper_q,
index + 1,
lower_incl,
upper_incl,
match_state,
arena,
);
let mut unpacked = [StateId::NONE; BYTE_CEILING];
unpacked[lower_byte as usize] = continuation;
let mut table = ArenaSmallTable::new();
table.pack(&unpacked);
return arena.alloc_with_table(table);
}
let mut unpacked = [StateId::NONE; BYTE_CEILING];
let lower_continuation =
make_greater_arena_fa_step(lower_q, index + 1, lower_incl, match_state, arena);
unpacked[lower_byte as usize] = lower_continuation;
for b in (lower_byte + 1)..upper_byte {
unpacked[b as usize] = match_state;
}
let upper_continuation =
make_less_arena_fa_step(upper_q, index + 1, upper_incl, match_state, arena);
unpacked[upper_byte as usize] = upper_continuation;
let mut table = ArenaSmallTable::new();
table.pack(&unpacked);
arena.alloc_with_table(table)
}
pub fn make_string_arena_fa(val: &[u8], next_field: Arc<FieldMatcher>) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let start = make_string_arena_fa_step(val, 0, match_state, &mut arena);
arena.precompute_epsilon_closures();
(arena, start)
}
fn make_string_arena_fa_step(
val: &[u8],
index: usize,
match_state: StateId,
arena: &mut StateArena,
) -> StateId {
if index >= val.len() {
return arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
}
let continuation = make_string_arena_fa_step(val, index + 1, match_state, arena);
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[val[index]],
&[continuation],
))
}
pub fn insert_string_into_arena(
arena: &mut StateArena,
start: StateId,
val: &[u8],
field_matcher: Arc<FieldMatcher>,
) {
let mut current = start;
for i in 0..=val.len() {
let byte = if i < val.len() {
val[i]
} else {
ARENA_VALUE_TERMINATOR
};
let next = arena[current].table.dstep(byte);
if !next.is_none() {
current = next;
} else {
let match_state = arena.alloc();
arena[match_state].field_transitions.push(field_matcher);
let mut target = match_state;
for j in (i + 1..=val.len()).rev() {
let b = if j < val.len() {
val[j]
} else {
ARENA_VALUE_TERMINATOR
};
target = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[b],
&[target],
));
}
arena[current].table.set_transition(byte, target);
return;
}
}
arena[current].field_transitions.push(field_matcher);
}
pub fn make_suffix_dfa(
reversed_bytes: &[u8],
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let mut target = match_state;
for &byte in reversed_bytes.iter().rev() {
target = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[target],
));
}
arena.precompute_epsilon_closures();
(arena, target) }
pub fn insert_suffix_into_arena(
arena: &mut StateArena,
start: StateId,
reversed_bytes: &[u8],
field_matcher: Arc<FieldMatcher>,
) {
let mut current = start;
for (i, &byte) in reversed_bytes.iter().enumerate() {
let next = arena[current].table.dstep(byte);
if !next.is_none() {
current = next;
} else {
let match_state = arena.alloc();
arena[match_state].field_transitions.push(field_matcher);
let mut target = match_state;
for &b in reversed_bytes[i + 1..].iter().rev() {
target = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[b],
&[target],
));
}
arena[current].table.set_transition(byte, target);
return;
}
}
arena[current].field_transitions.push(field_matcher);
}
pub fn make_prefix_arena_fa(prefix: &[u8], next_field: Arc<FieldMatcher>) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let start = make_prefix_arena_fa_step(prefix, 0, match_state, &mut arena);
arena.precompute_epsilon_closures();
(arena, start)
}
fn make_prefix_arena_fa_step(
prefix: &[u8],
index: usize,
match_state: StateId,
arena: &mut StateArena,
) -> StateId {
if index >= prefix.len() {
return arena.alloc_with_table(ArenaSmallTable::with_mappings(
match_state, &[],
&[],
));
}
let continuation = make_prefix_arena_fa_step(prefix, index + 1, match_state, arena);
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[prefix[index]],
&[continuation],
))
}
pub fn make_shellstyle_arena_fa(
pattern: &[u8],
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let segments = parse_shellstyle_segments(pattern);
let start = build_shellstyle_arena_segments(&segments, 0, match_state, &mut arena);
arena.precompute_epsilon_closures();
(arena, start)
}
#[derive(Debug)]
enum ShellstyleSegment {
Literal(Vec<u8>),
Wildcard,
}
fn parse_shellstyle_segments(pattern: &[u8]) -> Vec<ShellstyleSegment> {
let mut segments = Vec::new();
let mut i = 0;
while i < pattern.len() {
if pattern[i] == b'*' {
segments.push(ShellstyleSegment::Wildcard);
i += 1;
} else {
let start = i;
while i < pattern.len() && pattern[i] != b'*' {
i += 1;
}
segments.push(ShellstyleSegment::Literal(pattern[start..i].to_vec()));
}
}
segments
}
fn build_shellstyle_arena_segments(
segments: &[ShellstyleSegment],
index: usize,
match_state: StateId,
arena: &mut StateArena,
) -> StateId {
if index >= segments.len() {
return arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
}
match &segments[index] {
ShellstyleSegment::Literal(bytes) => {
let continuation =
build_shellstyle_arena_segments(segments, index + 1, match_state, arena);
build_literal_arena_chain(bytes, continuation, arena)
}
ShellstyleSegment::Wildcard => {
let continuation =
build_shellstyle_arena_segments(segments, index + 1, match_state, arena);
build_wildcard_arena_spinout(continuation, arena)
}
}
}
fn build_literal_arena_chain(
bytes: &[u8],
continuation: StateId,
arena: &mut StateArena,
) -> StateId {
if bytes.is_empty() {
return continuation;
}
let mut current = continuation;
for &byte in bytes.iter().rev() {
current = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[current],
));
}
current
}
fn build_wildcard_arena_spinout(continuation: StateId, arena: &mut StateArena) -> StateId {
let spinout = arena.alloc();
arena[spinout].table.spinout = spinout;
arena[spinout].table.epsilons.push(continuation);
let start = arena.alloc();
arena[start].table.epsilons.push(spinout);
start
}
pub fn make_wildcard_arena_fa(
pattern: &[u8],
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let segments = parse_wildcard_segments(pattern);
let start = build_shellstyle_arena_segments(&segments, 0, match_state, &mut arena);
arena.precompute_epsilon_closures();
(arena, start)
}
fn parse_wildcard_segments(pattern: &[u8]) -> Vec<ShellstyleSegment> {
let mut segments = Vec::new();
let mut i = 0;
while i < pattern.len() {
if pattern[i] == b'*' {
segments.push(ShellstyleSegment::Wildcard);
i += 1;
} else if pattern[i] == b'\\' && i + 1 < pattern.len() {
let escaped = pattern[i + 1];
if let Some(ShellstyleSegment::Literal(ref mut bytes)) = segments.last_mut() {
bytes.push(escaped);
} else {
segments.push(ShellstyleSegment::Literal(vec![escaped]));
}
i += 2;
} else {
if let Some(ShellstyleSegment::Literal(ref mut bytes)) = segments.last_mut() {
bytes.push(pattern[i]);
} else {
segments.push(ShellstyleSegment::Literal(vec![pattern[i]]));
}
i += 1;
}
}
segments
}
pub fn make_anything_but_arena_fa(
excluded: &[Vec<u8>],
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let success = arena.alloc();
arena[success].field_transitions.push(next_field);
let start = build_anything_but_step(excluded, 0, success, &mut arena);
arena.precompute_epsilon_closures();
(arena, start)
}
fn build_anything_but_step(
vals: &[Vec<u8>],
index: usize,
success: StateId,
arena: &mut StateArena,
) -> StateId {
use std::collections::{HashMap, HashSet};
let mut vals_with_bytes_remaining: HashMap<u8, Vec<&Vec<u8>>> = HashMap::new();
let mut vals_ending_here: HashSet<u8> = HashSet::new();
for val in vals {
let last_index = val.len().saturating_sub(1);
if index <= last_index && !val.is_empty() {
let utf8_byte = val[index];
if index < last_index {
vals_with_bytes_remaining
.entry(utf8_byte)
.or_default()
.push(val);
}
if index == last_index {
vals_ending_here.insert(utf8_byte);
}
}
}
let all_bytes: HashSet<u8> = vals_with_bytes_remaining
.keys()
.chain(vals_ending_here.iter())
.copied()
.collect();
let mut special_mappings: Vec<(u8, StateId)> = Vec::new();
for utf8_byte in all_bytes {
let has_continuation = vals_with_bytes_remaining.contains_key(&utf8_byte);
let ends_here = vals_ending_here.contains(&utf8_byte);
if has_continuation && ends_here {
let continuing_vals = vals_with_bytes_remaining.get(&utf8_byte).unwrap();
let owned_vals: Vec<Vec<u8>> = continuing_vals.iter().cloned().cloned().collect();
let continuation = build_anything_but_step(&owned_vals, index + 1, success, arena);
let fail_state = arena.alloc();
let mut combined_unpacked: [StateId; BYTE_CEILING] = [success; BYTE_CEILING];
for (byte, next) in arena[continuation].table.sparse_transitions() {
combined_unpacked[byte as usize] = next;
}
if !arena[continuation].table.default.is_none() {
for slot in combined_unpacked.iter_mut() {
if *slot == success {
*slot = arena[continuation].table.default;
}
}
}
combined_unpacked[ARENA_VALUE_TERMINATOR as usize] = fail_state;
let combined_state = arena.alloc();
arena[combined_state].table.pack(&combined_unpacked);
special_mappings.push((utf8_byte, combined_state));
} else if has_continuation {
let continuing_vals = vals_with_bytes_remaining.get(&utf8_byte).unwrap();
let owned_vals: Vec<Vec<u8>> = continuing_vals.iter().cloned().cloned().collect();
let next_state = build_anything_but_step(&owned_vals, index + 1, success, arena);
special_mappings.push((utf8_byte, next_state));
} else if ends_here {
let fail_state = arena.alloc(); let last_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
success, &[ARENA_VALUE_TERMINATOR],
&[fail_state], ));
special_mappings.push((utf8_byte, last_state));
}
}
if special_mappings.is_empty() {
return arena.alloc_with_table(ArenaSmallTable::with_mappings(success, &[], &[]));
}
let bytes: Vec<u8> = special_mappings.iter().map(|(b, _)| *b).collect();
let states: Vec<StateId> = special_mappings.iter().map(|(_, s)| *s).collect();
arena.alloc_with_table(ArenaSmallTable::with_mappings(success, &bytes, &states))
}
pub fn make_monocase_arena_fa(val: &[u8], next_field: Arc<FieldMatcher>) -> (StateArena, StateId) {
use crate::case_folding::case_fold_char;
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let start = if val.is_empty() {
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
))
} else if let Ok(s) = std::str::from_utf8(val) {
let chars: Vec<(Vec<u8>, Option<Vec<u8>>)> = s
.char_indices()
.map(|(offset, ch)| {
let next_offset = s[offset..]
.chars()
.next()
.map(|c| offset + c.len_utf8())
.unwrap_or(val.len());
let orig = val[offset..next_offset].to_vec();
let alt = case_fold_char(ch).map(|alt_char| {
let mut buf = [0u8; 4];
alt_char.encode_utf8(&mut buf);
buf[..alt_char.len_utf8()].to_vec()
});
(orig, alt)
})
.collect();
build_monocase_arena_recursive(&chars, 0, match_state, &mut arena)
} else {
build_monocase_ascii_chain(val, match_state, &mut arena)
};
arena.precompute_epsilon_closures();
(arena, start)
}
fn build_monocase_ascii_chain(val: &[u8], match_state: StateId, arena: &mut StateArena) -> StateId {
let term_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
let mut current_next = term_state;
for i in (0..val.len()).rev() {
let byte = val[i];
let alt_byte = if byte.is_ascii_lowercase() {
Some(byte.to_ascii_uppercase())
} else if byte.is_ascii_uppercase() {
Some(byte.to_ascii_lowercase())
} else {
None
};
let state = if let Some(alt) = alt_byte {
if byte < alt {
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte, alt],
&[current_next, current_next],
))
} else {
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[alt, byte],
&[current_next, current_next],
))
}
} else {
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[current_next],
))
};
current_next = state;
}
current_next
}
fn build_monocase_arena_recursive(
chars: &[(Vec<u8>, Option<Vec<u8>>)],
idx: usize,
match_state: StateId,
arena: &mut StateArena,
) -> StateId {
if idx >= chars.len() {
return arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
}
let (orig, alt) = &chars[idx];
let next_state = build_monocase_arena_recursive(chars, idx + 1, match_state, arena);
if let Some(alt_bytes) = alt {
let common_prefix = orig
.iter()
.zip(alt_bytes.iter())
.take_while(|(a, b)| a == b)
.count();
if common_prefix == 0 {
let orig_state = build_arena_fragment(orig, next_state, arena);
let alt_state = build_arena_fragment(alt_bytes, next_state, arena);
if orig[0] < alt_bytes[0] {
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[orig[0], alt_bytes[0]],
&[orig_state, alt_state],
))
} else {
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[alt_bytes[0], orig[0]],
&[alt_state, orig_state],
))
}
} else {
let orig_suffix = &orig[common_prefix..];
let alt_suffix = &alt_bytes[common_prefix..];
let diverge_state = if orig_suffix.is_empty() && alt_suffix.is_empty() {
next_state
} else if orig_suffix.is_empty() {
let alt_state = build_arena_fragment(alt_suffix, next_state, arena);
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[alt_suffix[0]],
&[alt_state],
))
} else if alt_suffix.is_empty() {
let orig_state = build_arena_fragment(orig_suffix, next_state, arena);
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[orig_suffix[0]],
&[orig_state],
))
} else {
let orig_state = build_arena_fragment(orig_suffix, next_state, arena);
let alt_state = build_arena_fragment(alt_suffix, next_state, arena);
if orig_suffix[0] < alt_suffix[0] {
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[orig_suffix[0], alt_suffix[0]],
&[orig_state, alt_state],
))
} else {
arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[alt_suffix[0], orig_suffix[0]],
&[alt_state, orig_state],
))
}
};
let prefix = &orig[..common_prefix];
if prefix.is_empty() {
diverge_state
} else {
let mut current = diverge_state;
for &byte in prefix.iter().rev() {
current = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[current],
));
}
current
}
}
} else {
let mut current = next_state;
for &byte in orig.iter().rev() {
current = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[current],
));
}
current
}
}
fn build_arena_fragment(val: &[u8], end_at: StateId, arena: &mut StateArena) -> StateId {
if val.is_empty() || val.len() == 1 {
return end_at;
}
let mut current = end_at;
for &byte in val[1..].iter().rev() {
current = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[current],
));
}
current
}
pub fn make_cidr_arena_fa(
cidr: &crate::json::CidrPattern,
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
use crate::json::CidrPattern;
let (mut arena, start) = match cidr {
CidrPattern::V4 {
network,
prefix_len,
} => make_ipv4_cidr_arena_fa(network, *prefix_len, next_field),
CidrPattern::V6 {
network,
prefix_len,
} => make_ipv6_cidr_arena_fa(network, *prefix_len, next_field),
};
arena.precompute_epsilon_closures();
(arena, start)
}
fn make_ipv4_cidr_arena_fa(
network: &[u8; 4],
prefix_len: u8,
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let term_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
let close_quote_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"\"",
&[term_state],
));
let mut current_state = close_quote_state;
for octet_idx in (0..4).rev() {
let octet_start_bit = octet_idx * 8;
let octet_end_bit = octet_start_bit + 8;
let (min_val, max_val) = if prefix_len as usize >= octet_end_bit {
(network[octet_idx], network[octet_idx])
} else if (prefix_len as usize) <= octet_start_bit {
(0u8, 255u8)
} else {
let constrained_bits = prefix_len as usize - octet_start_bit;
let mask = !0u8 << (8 - constrained_bits);
let base = network[octet_idx] & mask;
let range_size = 1u16 << (8 - constrained_bits);
(base, (base as u16 + range_size - 1).min(255) as u8)
};
let octet_start = build_octet_range_arena_fa(min_val, max_val, current_state, &mut arena);
if octet_idx > 0 {
current_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b".",
&[octet_start],
));
} else {
current_state = octet_start;
}
}
let open_quote_start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"\"",
&[current_state],
));
(arena, open_quote_start)
}
fn make_ipv6_cidr_arena_fa(
network: &[u8; 16],
prefix_len: u8,
next_field: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(next_field);
let term_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
let close_quote_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"\"",
&[term_state],
));
let mut current_state = close_quote_state;
for group_idx in (0..8).rev() {
let byte_idx = group_idx * 2;
let group_start_bit = group_idx * 16;
let group_end_bit = group_start_bit + 16;
let group_value = ((network[byte_idx] as u16) << 8) | (network[byte_idx + 1] as u16);
let (min_val, max_val) = if prefix_len as usize >= group_end_bit {
(group_value, group_value)
} else if (prefix_len as usize) <= group_start_bit {
(0u16, 0xffffu16)
} else {
let constrained_bits = prefix_len as usize - group_start_bit;
let mask = !0u16 << (16 - constrained_bits);
let base = group_value & mask;
let range_size = 1u32 << (16 - constrained_bits);
(base, (base as u32 + range_size - 1).min(0xffff) as u16)
};
let group_start =
build_ipv6_group_range_arena_fa(min_val, max_val, current_state, &mut arena);
if group_idx > 0 {
current_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b":",
&[group_start],
));
} else {
current_state = group_start;
}
}
let open_quote_start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"\"",
&[current_state],
));
(arena, open_quote_start)
}
fn build_octet_range_arena_fa(
min_val: u8,
max_val: u8,
continuation: StateId,
arena: &mut StateArena,
) -> StateId {
if min_val == max_val {
let val_str = min_val.to_string();
return build_literal_chain_arena(val_str.as_bytes(), continuation, arena);
}
let start = arena.alloc();
let mut value_starts = Vec::new();
for val in min_val..=max_val {
let val_str = val.to_string();
let val_start = build_literal_chain_arena(val_str.as_bytes(), continuation, arena);
value_starts.push(val_start);
}
arena[start].table.epsilons = SmallVec::from_vec(value_starts);
start
}
fn build_ipv6_group_range_arena_fa(
min_val: u16,
max_val: u16,
continuation: StateId,
arena: &mut StateArena,
) -> StateId {
if min_val == 0 && max_val == 0xffff {
return build_any_hex_group_arena(continuation, arena);
}
if min_val == max_val {
let val_str = format!("{:x}", min_val);
return build_literal_chain_arena(val_str.as_bytes(), continuation, arena);
}
let start = arena.alloc();
let mut value_starts = Vec::new();
for val in min_val..=max_val {
let val_str = format!("{:x}", val);
let val_start = build_literal_chain_arena(val_str.as_bytes(), continuation, arena);
value_starts.push(val_start);
}
arena[start].table.epsilons = SmallVec::from_vec(value_starts);
start
}
fn build_literal_chain_arena(val: &[u8], continuation: StateId, arena: &mut StateArena) -> StateId {
let mut current = continuation;
for &byte in val.iter().rev() {
current = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[current],
));
}
current
}
fn build_any_hex_group_arena(continuation: StateId, arena: &mut StateArena) -> StateId {
let hex_chars: Vec<u8> = (b'0'..=b'9')
.chain(b'a'..=b'f')
.chain(b'A'..=b'F')
.collect();
let mut current = continuation;
for digit_pos in (0..4).rev() {
let next_state = arena.alloc();
let mut bytes = Vec::new();
let mut targets = Vec::new();
for &b in &hex_chars {
bytes.push(b);
targets.push(current);
}
arena[next_state].table = ArenaSmallTable::with_mappings(StateId::NONE, &bytes, &targets);
if digit_pos > 0 {
arena[next_state].table.epsilons.push(continuation);
}
current = next_state;
}
current
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_state_id_none() {
assert!(StateId::NONE.is_none());
assert!(!StateId(0).is_none());
assert!(!StateId(100).is_none());
}
#[test]
fn test_arena_alloc() {
let mut arena = StateArena::new();
let id1 = arena.alloc();
let id2 = arena.alloc();
assert_eq!(id1.index(), 0);
assert_eq!(id2.index(), 1);
assert_eq!(arena.len(), 2);
}
#[test]
fn test_arena_cyclic_reference() {
let mut arena = StateArena::new();
let state_a = arena.alloc();
let state_b = arena.alloc();
arena[state_a].table.epsilons.push(state_b);
arena[state_b].table.epsilons.push(state_a);
assert_eq!(arena[state_a].table.epsilons[0], state_b);
assert_eq!(arena[state_b].table.epsilons[0], state_a);
}
#[test]
fn test_arena_small_table_pack() {
let mut table = ArenaSmallTable::new();
let mut unpacked = [StateId::NONE; BYTE_CEILING];
unpacked[b'a' as usize] = StateId(0);
unpacked[b'b' as usize] = StateId(1);
table.pack(&unpacked);
assert_eq!(table.dstep(b'a'), StateId(0));
assert_eq!(table.dstep(b'b'), StateId(1));
assert!(table.dstep(b'c').is_none());
}
#[test]
fn test_traverse_arena_nfa_simple() {
let mut arena = StateArena::new();
let field_matcher = Arc::new(FieldMatcher::new());
let final_state = arena.alloc();
arena[final_state]
.field_transitions
.push(field_matcher.clone());
let match_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[final_state],
));
let start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"a",
&[match_state],
));
arena.precompute_epsilon_closures();
let mut bufs = ArenaNfaBuffers::with_capacity();
let value = b"a";
traverse_arena_nfa(&arena, start, value, &mut bufs);
assert_eq!(bufs.transitions.len(), 1);
assert!(Arc::ptr_eq(&bufs.transitions[0], &field_matcher));
bufs.clear();
traverse_arena_nfa(&arena, start, b"b", &mut bufs);
assert!(bufs.transitions.is_empty());
}
#[test]
fn test_traverse_arena_nfa_star_cyclic() {
let mut arena = StateArena::new();
let field_matcher = Arc::new(FieldMatcher::new());
let final_state = arena.alloc();
arena[final_state]
.field_transitions
.push(field_matcher.clone());
let exit_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[final_state],
));
let loopback = arena.alloc();
let start = arena.alloc_with_table({
let mut table =
ArenaSmallTable::with_mappings(StateId::NONE, b"ab", &[loopback, loopback]);
table.epsilons.push(exit_state);
table
});
arena[loopback].table.epsilons = smallvec![exit_state, start];
arena.precompute_epsilon_closures();
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(&arena, start, b"", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "[ab]* should match empty string");
bufs.clear();
traverse_arena_nfa(&arena, start, b"a", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "[ab]* should match 'a'");
bufs.clear();
traverse_arena_nfa(&arena, start, b"ab", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "[ab]* should match 'ab'");
bufs.clear();
traverse_arena_nfa(&arena, start, b"aaa", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "[ab]* should match 'aaa'");
bufs.clear();
traverse_arena_nfa(&arena, start, b"abba", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "[ab]* should match 'abba'");
bufs.clear();
let long_value = "ab".repeat(100);
traverse_arena_nfa(&arena, start, long_value.as_bytes(), &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "[ab]* should match long string");
bufs.clear();
traverse_arena_nfa(&arena, start, b"c", &mut bufs);
assert!(bufs.transitions.is_empty(), "[ab]* should NOT match 'c'");
}
#[test]
fn test_traverse_arena_nfa_plus_cyclic() {
let mut arena = StateArena::new();
let field_matcher = Arc::new(FieldMatcher::new());
let final_state = arena.alloc();
arena[final_state]
.field_transitions
.push(field_matcher.clone());
let exit_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[final_state],
));
let loopback = arena.alloc();
let start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"ab",
&[loopback, loopback],
));
arena[loopback].table.epsilons = smallvec![exit_state, start];
arena.precompute_epsilon_closures();
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(&arena, start, b"", &mut bufs);
assert!(
bufs.transitions.is_empty(),
"[ab]+ should NOT match empty string"
);
bufs.clear();
traverse_arena_nfa(&arena, start, b"a", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "[ab]+ should match 'a'");
bufs.clear();
traverse_arena_nfa(&arena, start, b"ab", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "[ab]+ should match 'ab'");
bufs.clear();
let long_value = "ab".repeat(100);
traverse_arena_nfa(&arena, start, long_value.as_bytes(), &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "[ab]+ should match long string");
}
#[test]
fn test_arena_state_count_vs_chain() {
let mut arena = StateArena::new();
let field_matcher = Arc::new(FieldMatcher::new());
let final_state = arena.alloc();
arena[final_state].field_transitions.push(field_matcher);
let exit_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[final_state],
));
let loopback = arena.alloc();
let start = arena.alloc_with_table({
let mut table = ArenaSmallTable::with_mappings(StateId::NONE, b"a", &[loopback]);
table.epsilons.push(exit_state);
table
});
arena[loopback].table.epsilons = smallvec![exit_state, start];
assert_eq!(arena.len(), 4, "Arena [a]* should only need 4 states");
arena.precompute_epsilon_closures();
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(&arena, start, b"aaaaaaaaaa", &mut bufs);
assert_eq!(bufs.transitions.len(), 1);
}
#[test]
fn test_ascii_fast_path_acceleration() {
let mut arena = StateArena::new();
let field_matcher = Arc::new(FieldMatcher::new());
let final_state = arena.alloc();
arena[final_state]
.field_transitions
.push(field_matcher.clone());
let exit_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[final_state],
));
let loopback = arena.alloc();
let mut unpacked = [StateId::NONE; BYTE_CEILING];
for (byte, slot) in unpacked.iter_mut().enumerate() {
if byte != b'x' as usize {
*slot = loopback;
}
}
let mut start_table = ArenaSmallTable::new();
start_table.pack(&unpacked);
start_table.accel = Some(super::super::AccelInfo {
exit_bytes: [b'x', 0, 0],
len: 1,
});
let start = arena.alloc_with_table(start_table);
arena[loopback].table.epsilons = smallvec![exit_state, start];
arena[loopback].table.accel = Some(super::super::AccelInfo {
exit_bytes: [b'x', 0, 0],
len: 1,
});
arena.precompute_epsilon_closures();
let mut bufs = ArenaNfaBuffers::with_capacity();
let test_value = b"aaaaaaaaaaaaaaaaaaaaaaaaax";
traverse_arena_nfa(&arena, start, test_value, &mut bufs);
assert!(
bufs.transitions.is_empty(),
"[^x]+ should NOT match string ending with 'x'"
);
bufs.clear();
let test_value2 = b"aaaaaaaaaaaaaaaaaaaaaaaa";
traverse_arena_nfa(&arena, start, test_value2, &mut bufs);
assert_eq!(
bufs.transitions.len(),
1,
"[^x]+ should match string without 'x'"
);
bufs.clear();
let test_value3 = b"aaaaaaxaaaaaa";
traverse_arena_nfa(&arena, start, test_value3, &mut bufs);
assert!(
bufs.transitions.is_empty(),
"[^x]+ should NOT match string with 'x' in middle"
);
}
#[test]
fn test_try_accelerate_arena() {
let mut table = ArenaSmallTable::new();
assert!(try_accelerate_arena(&table, b"hello").is_none());
table.accel = Some(super::super::AccelInfo {
exit_bytes: [b'x', 0, 0],
len: 1,
});
assert_eq!(try_accelerate_arena(&table, b"helloxworld"), Some(5)); assert!(try_accelerate_arena(&table, b"hello").is_none());
table.accel = Some(super::super::AccelInfo {
exit_bytes: [b'x', b'y', 0],
len: 2,
});
assert!(try_accelerate_arena(&table, b"helloworld").is_none()); assert_eq!(try_accelerate_arena(&table, b"hellxyworld"), Some(4)); assert_eq!(try_accelerate_arena(&table, b"hellyxworld"), Some(4)); assert_eq!(try_accelerate_arena(&table, b"helloxyw"), Some(5));
table.accel = Some(super::super::AccelInfo {
exit_bytes: [b'x', b'y', b'z'],
len: 3,
});
assert_eq!(try_accelerate_arena(&table, b"abcdefghijz"), Some(10)); assert_eq!(try_accelerate_arena(&table, b"abcxyz"), Some(3)); assert!(try_accelerate_arena(&table, b"abcdefghij").is_none()); }
}
#[cfg(test)]
mod merge_tests {
use super::*;
fn make_single_byte_arena(byte: u8, fm: Arc<FieldMatcher>) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let end = arena.alloc();
arena[end].field_transitions.push(fm);
let term = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[end],
));
let start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[term],
));
(arena, start)
}
#[test]
fn test_merge_empty_arenas() {
let arena1 = StateArena::new();
let arena2 = StateArena::new();
let (merged, start) = merge_arena_dfas(&arena1, StateId::NONE, &arena2, StateId::NONE);
assert!(
start.is_none(),
"Merging empty arenas should return NONE start"
);
assert!(merged.is_empty(), "Merged arena should be empty");
}
#[test]
fn test_merge_one_empty_arena() {
let fm = Arc::new(FieldMatcher::new());
let (arena1, start1) = make_single_byte_arena(b'a', fm.clone());
let (merged, start) = merge_arena_dfas(&arena1, start1, &StateArena::new(), StateId::NONE);
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(&merged, start, b"a", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "Should match 'a'");
}
#[test]
fn test_merge_single_transition() {
let fm1 = Arc::new(FieldMatcher::new());
let fm2 = Arc::new(FieldMatcher::new());
let (arena1, start1) = make_single_byte_arena(b'a', fm1.clone());
let (arena2, start2) = make_single_byte_arena(b'b', fm2.clone());
let (merged, start) = merge_arena_dfas(&arena1, start1, &arena2, start2);
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(&merged, start, b"a", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "Merged should match 'a'");
assert!(Arc::ptr_eq(&bufs.transitions[0], &fm1));
bufs.clear();
traverse_arena_nfa(&merged, start, b"b", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "Merged should match 'b'");
assert!(Arc::ptr_eq(&bufs.transitions[0], &fm2));
bufs.clear();
traverse_arena_nfa(&merged, start, b"c", &mut bufs);
assert!(bufs.transitions.is_empty(), "Merged should NOT match 'c'");
}
#[test]
fn test_merge_overlapping_transitions() {
let fm1 = Arc::new(FieldMatcher::new());
let fm2 = Arc::new(FieldMatcher::new());
let (arena1, start1) = make_single_byte_arena(b'a', fm1.clone());
let (arena2, start2) = make_single_byte_arena(b'a', fm2.clone());
let (merged, start) = merge_arena_dfas(&arena1, start1, &arena2, start2);
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(&merged, start, b"a", &mut bufs);
assert_eq!(
bufs.transitions.len(),
2,
"Overlapping merge should have 2 transitions"
);
}
#[test]
fn test_merge_preserves_field_transitions() {
let fm1 = Arc::new(FieldMatcher::with_match_id(100));
let fm2 = Arc::new(FieldMatcher::with_match_id(200));
let (arena1, start1) = make_single_byte_arena(b'x', fm1.clone());
let (arena2, start2) = make_single_byte_arena(b'y', fm2.clone());
let (merged, start) = merge_arena_dfas(&arena1, start1, &arena2, start2);
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(&merged, start, b"x", &mut bufs);
assert_eq!(bufs.transitions.len(), 1);
assert_eq!(bufs.transitions[0].match_id, Some(100));
bufs.clear();
traverse_arena_nfa(&merged, start, b"y", &mut bufs);
assert_eq!(bufs.transitions.len(), 1);
assert_eq!(bufs.transitions[0].match_id, Some(200));
}
#[test]
fn test_merge_multiple_arenas_associative() {
let fm_a = Arc::new(FieldMatcher::with_match_id(1));
let fm_b = Arc::new(FieldMatcher::with_match_id(2));
let fm_c = Arc::new(FieldMatcher::with_match_id(3));
let (arena_a, start_a) = make_single_byte_arena(b'a', fm_a.clone());
let (arena_b, start_b) = make_single_byte_arena(b'b', fm_b.clone());
let (arena_c, start_c) = make_single_byte_arena(b'c', fm_c.clone());
let (ab, ab_start) = merge_arena_dfas(&arena_a, start_a, &arena_b, start_b);
let (abc_left, abc_left_start) = merge_arena_dfas(&ab, ab_start, &arena_c, start_c);
let (bc, bc_start) = merge_arena_dfas(&arena_b, start_b, &arena_c, start_c);
let (abc_right, abc_right_start) = merge_arena_dfas(&arena_a, start_a, &bc, bc_start);
let mut bufs1 = ArenaNfaBuffers::with_capacity();
let mut bufs2 = ArenaNfaBuffers::with_capacity();
for byte in [b'a', b'b', b'c'] {
bufs1.clear();
bufs2.clear();
traverse_arena_nfa(&abc_left, abc_left_start, &[byte], &mut bufs1);
traverse_arena_nfa(&abc_right, abc_right_start, &[byte], &mut bufs2);
assert_eq!(
bufs1.transitions.len(),
bufs2.transitions.len(),
"Associativity: both should have same number of transitions for '{}'",
byte as char
);
assert_eq!(
bufs1.transitions.len(),
1,
"Should match '{}'",
byte as char
);
}
}
#[test]
fn test_merge_multi_byte_sequences() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = {
let mut arena = StateArena::new();
let end = arena.alloc();
arena[end].field_transitions.push(fm1.clone());
let term = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[end],
));
let state_b = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"b",
&[term],
));
let start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"a",
&[state_b],
));
(arena, start)
};
let (arena2, start2) = {
let mut arena = StateArena::new();
let end = arena.alloc();
arena[end].field_transitions.push(fm2.clone());
let term = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[end],
));
let state_c = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"c",
&[term],
));
let start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"a",
&[state_c],
));
(arena, start)
};
let (merged, start) = merge_arena_dfas(&arena1, start1, &arena2, start2);
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(&merged, start, b"ab", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "Should match 'ab'");
assert_eq!(bufs.transitions[0].match_id, Some(1));
bufs.clear();
traverse_arena_nfa(&merged, start, b"ac", &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "Should match 'ac'");
assert_eq!(bufs.transitions[0].match_id, Some(2));
for val in [b"a".as_slice(), b"ad", b"bc"] {
bufs.clear();
traverse_arena_nfa(&merged, start, val, &mut bufs);
assert!(
bufs.transitions.is_empty(),
"Should NOT match {:?}",
std::str::from_utf8(val)
);
}
}
}
#[cfg(test)]
mod numeric_arena_tests {
use super::*;
use crate::numbits::q_num_from_f64;
fn matches_arena(arena: &StateArena, start: StateId, q_num: &[u8]) -> bool {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, q_num, &mut bufs);
!bufs.transitions.is_empty()
}
#[test]
fn test_numeric_less_arena_fa_basic() {
let next_field = Arc::new(FieldMatcher::new());
let (arena, start) = make_numeric_less_arena_fa(100.0, true, next_field.clone());
let q50 = q_num_from_f64(50.0);
let q100 = q_num_from_f64(100.0);
let q150 = q_num_from_f64(150.0);
let q0 = q_num_from_f64(0.0);
let q_neg = q_num_from_f64(-50.0);
assert!(matches_arena(&arena, start, &q50), "50 should match <= 100");
assert!(
matches_arena(&arena, start, &q100),
"100 should match <= 100 (inclusive)"
);
assert!(
!matches_arena(&arena, start, &q150),
"150 should NOT match <= 100"
);
assert!(matches_arena(&arena, start, &q0), "0 should match <= 100");
assert!(
matches_arena(&arena, start, &q_neg),
"-50 should match <= 100"
);
}
#[test]
fn test_numeric_less_arena_fa_exclusive() {
let next_field = Arc::new(FieldMatcher::new());
let (arena, start) = make_numeric_less_arena_fa(100.0, false, next_field.clone());
let q99 = q_num_from_f64(99.0);
let q100 = q_num_from_f64(100.0);
assert!(matches_arena(&arena, start, &q99), "99 should match < 100");
assert!(
!matches_arena(&arena, start, &q100),
"100 should NOT match < 100 (exclusive)"
);
}
#[test]
fn test_numeric_greater_arena_fa_basic() {
let next_field = Arc::new(FieldMatcher::new());
let (arena, start) = make_numeric_greater_arena_fa(100.0, true, next_field.clone());
let q50 = q_num_from_f64(50.0);
let q100 = q_num_from_f64(100.0);
let q150 = q_num_from_f64(150.0);
assert!(
!matches_arena(&arena, start, &q50),
"50 should NOT match >= 100"
);
assert!(
matches_arena(&arena, start, &q100),
"100 should match >= 100 (inclusive)"
);
assert!(
matches_arena(&arena, start, &q150),
"150 should match >= 100"
);
}
#[test]
fn test_numeric_greater_arena_fa_exclusive() {
let next_field = Arc::new(FieldMatcher::new());
let (arena, start) = make_numeric_greater_arena_fa(100.0, false, next_field.clone());
let q100 = q_num_from_f64(100.0);
let q101 = q_num_from_f64(101.0);
assert!(
!matches_arena(&arena, start, &q100),
"100 should NOT match > 100 (exclusive)"
);
assert!(
matches_arena(&arena, start, &q101),
"101 should match > 100"
);
}
#[test]
fn test_numeric_range_arena_fa_two_sided() {
let next_field = Arc::new(FieldMatcher::new());
let (arena, start) =
make_numeric_range_arena_fa(50.0, true, 150.0, true, next_field.clone());
let q25 = q_num_from_f64(25.0);
let q50 = q_num_from_f64(50.0);
let q100 = q_num_from_f64(100.0);
let q150 = q_num_from_f64(150.0);
let q200 = q_num_from_f64(200.0);
assert!(
!matches_arena(&arena, start, &q25),
"25 should NOT match [50, 150]"
);
assert!(
matches_arena(&arena, start, &q50),
"50 should match [50, 150]"
);
assert!(
matches_arena(&arena, start, &q100),
"100 should match [50, 150]"
);
assert!(
matches_arena(&arena, start, &q150),
"150 should match [50, 150]"
);
assert!(
!matches_arena(&arena, start, &q200),
"200 should NOT match [50, 150]"
);
}
#[test]
fn test_numeric_range_arena_fa_exclusive_bounds() {
let next_field = Arc::new(FieldMatcher::new());
let (arena, start) =
make_numeric_range_arena_fa(50.0, false, 150.0, false, next_field.clone());
let q50 = q_num_from_f64(50.0);
let q51 = q_num_from_f64(51.0);
let q149 = q_num_from_f64(149.0);
let q150 = q_num_from_f64(150.0);
assert!(
!matches_arena(&arena, start, &q50),
"50 should NOT match (50, 150)"
);
assert!(
matches_arena(&arena, start, &q51),
"51 should match (50, 150)"
);
assert!(
matches_arena(&arena, start, &q149),
"149 should match (50, 150)"
);
assert!(
!matches_arena(&arena, start, &q150),
"150 should NOT match (50, 150)"
);
}
#[test]
fn test_numeric_arena_fa_edge_cases() {
let next_field = Arc::new(FieldMatcher::new());
let (arena, start) = make_numeric_less_arena_fa(0.0, true, next_field.clone());
let q_neg = q_num_from_f64(-1.0);
let q0 = q_num_from_f64(0.0);
let q1 = q_num_from_f64(1.0);
assert!(matches_arena(&arena, start, &q_neg), "-1 should match <= 0");
assert!(matches_arena(&arena, start, &q0), "0 should match <= 0");
assert!(
!matches_arena(&arena, start, &q1),
"1 should NOT match <= 0"
);
let (arena2, start2) = make_numeric_greater_arena_fa(-100.0, true, next_field.clone());
let q_neg50 = q_num_from_f64(-50.0);
let q_neg100 = q_num_from_f64(-100.0);
let q_neg150 = q_num_from_f64(-150.0);
assert!(
matches_arena(&arena2, start2, &q_neg50),
"-50 should match >= -100"
);
assert!(
matches_arena(&arena2, start2, &q_neg100),
"-100 should match >= -100"
);
assert!(
!matches_arena(&arena2, start2, &q_neg150),
"-150 should NOT match >= -100"
);
}
#[test]
fn test_numeric_arena_fa_float_values() {
let next_field = Arc::new(FieldMatcher::new());
let (arena, start) = make_numeric_range_arena_fa(1.5, true, 2.5, true, next_field.clone());
let q1 = q_num_from_f64(1.0);
let q1_5 = q_num_from_f64(1.5);
let q2 = q_num_from_f64(2.0);
let q2_5 = q_num_from_f64(2.5);
let q3 = q_num_from_f64(3.0);
assert!(
!matches_arena(&arena, start, &q1),
"1.0 should NOT match [1.5, 2.5]"
);
assert!(
matches_arena(&arena, start, &q1_5),
"1.5 should match [1.5, 2.5]"
);
assert!(
matches_arena(&arena, start, &q2),
"2.0 should match [1.5, 2.5]"
);
assert!(
matches_arena(&arena, start, &q2_5),
"2.5 should match [1.5, 2.5]"
);
assert!(
!matches_arena(&arena, start, &q3),
"3.0 should NOT match [1.5, 2.5]"
);
}
#[test]
fn test_numeric_arena_fa_merge() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = make_numeric_less_arena_fa(50.0, false, fm1.clone());
let (arena2, start2) = make_numeric_greater_arena_fa(100.0, false, fm2.clone());
let (merged, merged_start) = merge_arena_dfas(&arena1, start1, &arena2, start2);
let q25 = q_num_from_f64(25.0);
let q75 = q_num_from_f64(75.0);
let q150 = q_num_from_f64(150.0);
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(&merged, merged_start, &q25, &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "25 should match merged FA");
assert_eq!(bufs.transitions[0].match_id, Some(1));
bufs.clear();
traverse_arena_nfa(&merged, merged_start, &q75, &mut bufs);
assert!(bufs.transitions.is_empty(), "75 should NOT match merged FA");
bufs.clear();
traverse_arena_nfa(&merged, merged_start, &q150, &mut bufs);
assert_eq!(bufs.transitions.len(), 1, "150 should match merged FA");
assert_eq!(bufs.transitions[0].match_id, Some(2));
}
#[test]
fn test_numeric_arena_fa_ordering_preserved() {
let next_field = Arc::new(FieldMatcher::new());
let test_values = vec![
-1000.0, -100.0, -10.0, -1.0, -0.5, 0.0, 0.5, 1.0, 10.0, 100.0, 1000.0,
];
for &bound in &test_values {
let (arena_less, start_less) =
make_numeric_less_arena_fa(bound, false, next_field.clone());
let (arena_greater, start_greater) =
make_numeric_greater_arena_fa(bound, false, next_field.clone());
for &val in &test_values {
let q_val = q_num_from_f64(val);
let matches_less = matches_arena(&arena_less, start_less, &q_val);
let matches_greater = matches_arena(&arena_greater, start_greater, &q_val);
if val < bound {
assert!(
matches_less,
"{} should match < {} (Q-number ordering)",
val, bound
);
assert!(
!matches_greater,
"{} should NOT match > {} (Q-number ordering)",
val, bound
);
} else if val > bound {
assert!(
!matches_less,
"{} should NOT match < {} (Q-number ordering)",
val, bound
);
assert!(
matches_greater,
"{} should match > {} (Q-number ordering)",
val, bound
);
} else {
assert!(
!matches_less,
"{} should NOT match < {} (exclusive)",
val, bound
);
assert!(
!matches_greater,
"{} should NOT match > {} (exclusive)",
val, bound
);
}
}
}
}
}
#[cfg(test)]
mod nfa_merge_tests {
use super::*;
fn matches_value(arena: &StateArena, start: StateId, value: &[u8]) -> bool {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, value, &mut bufs);
!bufs.transitions.is_empty()
}
fn get_match_ids(arena: &StateArena, start: StateId, value: &[u8]) -> Vec<u64> {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, value, &mut bufs);
bufs.transitions
.iter()
.filter_map(|fm| fm.match_id)
.collect()
}
fn make_epsilon_alternation_arena(
patterns: &[&[u8]],
fm: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(fm);
let term_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
let mut branches = Vec::new();
for pattern in patterns {
if pattern.is_empty() {
branches.push(term_state);
} else {
let mut current = term_state;
for &byte in pattern.iter().rev() {
let state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[current],
));
current = state;
}
branches.push(current);
}
}
let start = arena.alloc();
arena[start].table.epsilons = SmallVec::from_vec(branches);
arena.precompute_epsilon_closures();
(arena, start)
}
fn make_spinout_arena(
prefix: &[u8],
suffix: &[u8],
fm: Arc<FieldMatcher>,
) -> (StateArena, StateId) {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(fm);
let term_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
let mut after_spinout = term_state;
for &byte in suffix.iter().rev() {
let state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[after_spinout],
));
after_spinout = state;
}
let spinout_state = arena.alloc();
arena[spinout_state].table.spinout = spinout_state; arena[spinout_state].table.epsilons.push(after_spinout);
if !suffix.is_empty() {
let mut unpacked = [StateId::NONE; BYTE_CEILING];
unpacked[suffix[0] as usize] = after_spinout;
arena[spinout_state].table.pack(&unpacked);
}
let mut current = spinout_state;
let start = if prefix.is_empty() {
let start = arena.alloc();
arena[start].table.epsilons.push(spinout_state);
start
} else {
for &byte in prefix.iter().rev() {
let state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[byte],
&[current],
));
current = state;
}
current
};
arena.precompute_epsilon_closures();
(arena, start)
}
#[test]
fn test_merge_arena_with_epsilons() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let (arena1, start1) = make_epsilon_alternation_arena(&[b"a", b"b"], fm1.clone());
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena2, start2) = {
let mut arena = StateArena::new();
let end = arena.alloc();
arena[end].field_transitions.push(fm2.clone());
let term = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[end],
));
let start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"c",
&[term],
));
(arena, start)
};
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
let ids_a = get_match_ids(&merged, merged_start, b"a");
assert!(ids_a.contains(&1), "Merged should match 'a' (id=1)");
let ids_b = get_match_ids(&merged, merged_start, b"b");
assert!(ids_b.contains(&1), "Merged should match 'b' (id=1)");
let ids_c = get_match_ids(&merged, merged_start, b"c");
assert!(ids_c.contains(&2), "Merged should match 'c' (id=2)");
assert!(
!matches_value(&merged, merged_start, b"d"),
"Merged should NOT match 'd'"
);
}
#[test]
fn test_merge_arena_with_spinout() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let (arena1, start1) = make_spinout_arena(b"a", b"b", fm1.clone());
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena2, start2) = make_spinout_arena(b"x", b"y", fm2.clone());
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
let ids_ab = get_match_ids(&merged, merged_start, b"ab");
assert!(ids_ab.contains(&1), "Merged should match 'ab'");
let ids_axxxb = get_match_ids(&merged, merged_start, b"aXXXb");
assert!(ids_axxxb.contains(&1), "Merged should match 'aXXXb'");
let ids_xy = get_match_ids(&merged, merged_start, b"xy");
assert!(ids_xy.contains(&2), "Merged should match 'xy'");
let ids_xzzzy = get_match_ids(&merged, merged_start, b"xZZZy");
assert!(ids_xzzzy.contains(&2), "Merged should match 'xZZZy'");
assert!(
!matches_value(&merged, merged_start, b"ac"),
"Merged should NOT match 'ac'"
);
}
#[test]
fn test_merge_arena_shellstyle_patterns() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = make_spinout_arena(b"foo", b"", fm1.clone());
let (arena2, start2) = make_spinout_arena(b"", b"bar", fm2.clone());
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
let ids_foo = get_match_ids(&merged, merged_start, b"foo");
assert!(ids_foo.contains(&1), "Merged should match 'foo'");
let ids_fooxyz = get_match_ids(&merged, merged_start, b"fooXYZ");
assert!(ids_fooxyz.contains(&1), "Merged should match 'fooXYZ'");
let ids_bar = get_match_ids(&merged, merged_start, b"bar");
assert!(ids_bar.contains(&2), "Merged should match 'bar'");
let ids_xyzbar = get_match_ids(&merged, merged_start, b"XYZbar");
assert!(ids_xyzbar.contains(&2), "Merged should match 'XYZbar'");
let ids_foobar = get_match_ids(&merged, merged_start, b"foobar");
assert!(
ids_foobar.contains(&1) && ids_foobar.contains(&2),
"Merged should match 'foobar' with both patterns"
);
}
#[test]
fn test_merge_arena_preserves_cycles() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let (arena1, start1) = {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(fm1.clone());
let term_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
let loopback = arena.alloc();
let start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"ab",
&[loopback, loopback],
));
arena[loopback].table.epsilons = smallvec![term_state, start];
(arena, start)
};
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena2, start2) = {
let mut arena = StateArena::new();
let end = arena.alloc();
arena[end].field_transitions.push(fm2.clone());
let term = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[end],
));
let start = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"c",
&[term],
));
(arena, start)
};
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
assert!(
matches_value(&merged, merged_start, b"a"),
"Merged cycle should match 'a'"
);
assert!(
matches_value(&merged, merged_start, b"b"),
"Merged cycle should match 'b'"
);
assert!(
matches_value(&merged, merged_start, b"ab"),
"Merged cycle should match 'ab'"
);
assert!(
matches_value(&merged, merged_start, b"aba"),
"Merged cycle should match 'aba'"
);
assert!(
matches_value(&merged, merged_start, b"abba"),
"Merged cycle should match 'abba'"
);
assert!(
matches_value(&merged, merged_start, b"aaabbb"),
"Merged cycle should match 'aaabbb'"
);
let long_ab = "ab".repeat(50);
assert!(
matches_value(&merged, merged_start, long_ab.as_bytes()),
"Merged cycle should match long 'abab...' pattern"
);
assert!(
matches_value(&merged, merged_start, b"c"),
"Merged should match 'c'"
);
assert!(
!matches_value(&merged, merged_start, b"d"),
"Merged should NOT match 'd'"
);
assert!(
!matches_value(&merged, merged_start, b""),
"[ab]+ should NOT match empty string"
);
}
#[test]
fn test_merge_arena_both_have_spinouts() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let (arena1, start1) = {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(fm1.clone());
let term_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
let spinout2 = arena.alloc();
arena[spinout2].table.spinout = spinout2;
arena[spinout2].table.epsilons.push(term_state);
let x_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"X",
&[spinout2],
));
let spinout1 = arena.alloc();
arena[spinout1].table.spinout = spinout1;
arena[spinout1].table.epsilons.push(x_state);
let mut unpacked = [StateId::NONE; BYTE_CEILING];
unpacked[b'X' as usize] = spinout2;
arena[spinout1].table.pack(&unpacked);
let start = arena.alloc();
arena[start].table.epsilons.push(spinout1);
(arena, start)
};
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena2, start2) = {
let mut arena = StateArena::new();
let match_state = arena.alloc();
arena[match_state].field_transitions.push(fm2.clone());
let term_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
&[ARENA_VALUE_TERMINATOR],
&[match_state],
));
let spinout2 = arena.alloc();
arena[spinout2].table.spinout = spinout2;
arena[spinout2].table.epsilons.push(term_state);
let y_state = arena.alloc_with_table(ArenaSmallTable::with_mappings(
StateId::NONE,
b"Y",
&[spinout2],
));
let spinout1 = arena.alloc();
arena[spinout1].table.spinout = spinout1;
arena[spinout1].table.epsilons.push(y_state);
let mut unpacked = [StateId::NONE; BYTE_CEILING];
unpacked[b'Y' as usize] = spinout2;
arena[spinout1].table.pack(&unpacked);
let start = arena.alloc();
arena[start].table.epsilons.push(spinout1);
(arena, start)
};
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
assert!(
get_match_ids(&merged, merged_start, b"X").contains(&1),
"Should match 'X'"
);
assert!(
get_match_ids(&merged, merged_start, b"aXb").contains(&1),
"Should match 'aXb'"
);
assert!(
get_match_ids(&merged, merged_start, b"aaaXbbb").contains(&1),
"Should match 'aaaXbbb'"
);
assert!(
get_match_ids(&merged, merged_start, b"Y").contains(&2),
"Should match 'Y'"
);
assert!(
get_match_ids(&merged, merged_start, b"aYb").contains(&2),
"Should match 'aYb'"
);
let ids_xy = get_match_ids(&merged, merged_start, b"XY");
assert!(
ids_xy.contains(&1) && ids_xy.contains(&2),
"Should match 'XY' with both patterns"
);
let ids_yax = get_match_ids(&merged, merged_start, b"YaX");
assert!(
ids_yax.contains(&1) && ids_yax.contains(&2),
"Should match 'YaX' with both patterns"
);
assert!(
!matches_value(&merged, merged_start, b"abc"),
"Should NOT match 'abc'"
);
}
#[test]
fn test_merge_arena_nfas_empty_cases() {
let fm = Arc::new(FieldMatcher::new());
let (arena1, start1) = make_epsilon_alternation_arena(&[b"a"], fm.clone());
let (merged, merged_start) =
merge_arena_nfas(&arena1, start1, &StateArena::new(), StateId::NONE);
assert!(
matches_value(&merged, merged_start, b"a"),
"Merging with empty should preserve original"
);
let (merged2, merged_start2) =
merge_arena_nfas(&StateArena::new(), StateId::NONE, &arena1, start1);
assert!(
matches_value(&merged2, merged_start2, b"a"),
"Merging empty with non-empty should preserve original"
);
let (_merged3, merged_start3) = merge_arena_nfas(
&StateArena::new(),
StateId::NONE,
&StateArena::new(),
StateId::NONE,
);
assert!(
merged_start3.is_none(),
"Merging two empty arenas should return NONE"
);
}
#[test]
fn test_flatten_epsilon_targets_on_repeated_merge() {
let fm = Arc::new(FieldMatcher::new());
let (a1, s1) = make_epsilon_alternation_arena(&[b"a"], fm.clone());
let (a2, s2) = make_epsilon_alternation_arena(&[b"b"], fm.clone());
let (a3, s3) = make_epsilon_alternation_arena(&[b"c"], fm.clone());
let (a4, s4) = make_epsilon_alternation_arena(&[b"d"], fm.clone());
let (m12, s12) = merge_arena_nfas(&a1, s1, &a2, s2);
let (m123, s123) = merge_arena_nfas(&m12, s12, &a3, s3);
let (m1234, s1234) = merge_arena_nfas(&m123, s123, &a4, s4);
assert!(matches_value(&m1234, s1234, b"a"), "should match 'a'");
assert!(matches_value(&m1234, s1234, b"b"), "should match 'b'");
assert!(matches_value(&m1234, s1234, b"c"), "should match 'c'");
assert!(matches_value(&m1234, s1234, b"d"), "should match 'd'");
assert!(!matches_value(&m1234, s1234, b"e"), "should not match 'e'");
let start_state = &m1234[s1234];
let eps_count = start_state.table.epsilons.len();
assert!(
eps_count > 2,
"Flattening should produce > 2 direct epsilon targets, got {eps_count}"
);
}
}
#[cfg(test)]
mod string_arena_tests {
use super::*;
fn matches_value(arena: &StateArena, start: StateId, value: &[u8]) -> bool {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, value, &mut bufs);
!bufs.transitions.is_empty()
}
#[test]
fn test_string_arena_fa_basic() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_string_arena_fa(b"hello", fm.clone());
assert!(
matches_value(&arena, start, b"hello"),
"Should match 'hello'"
);
assert!(
!matches_value(&arena, start, b"hell"),
"Should NOT match 'hell' (prefix)"
);
assert!(
!matches_value(&arena, start, b"helloworld"),
"Should NOT match 'helloworld' (longer)"
);
assert!(
!matches_value(&arena, start, b"world"),
"Should NOT match 'world'"
);
assert!(
!matches_value(&arena, start, b""),
"Should NOT match empty string"
);
}
#[test]
fn test_string_arena_fa_empty_string() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_string_arena_fa(b"", fm.clone());
assert!(
matches_value(&arena, start, b""),
"Should match empty string"
);
assert!(!matches_value(&arena, start, b"a"), "Should NOT match 'a'");
}
#[test]
fn test_string_arena_fa_single_char() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_string_arena_fa(b"x", fm.clone());
assert!(matches_value(&arena, start, b"x"), "Should match 'x'");
assert!(!matches_value(&arena, start, b"y"), "Should NOT match 'y'");
assert!(
!matches_value(&arena, start, b"xy"),
"Should NOT match 'xy'"
);
}
#[test]
fn test_string_arena_fa_utf8() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_string_arena_fa("café".as_bytes(), fm.clone());
assert!(
matches_value(&arena, start, "café".as_bytes()),
"Should match 'café'"
);
assert!(
!matches_value(&arena, start, b"cafe"),
"Should NOT match 'cafe'"
);
}
#[test]
fn test_string_arena_fa_merge() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = make_string_arena_fa(b"foo", fm1.clone());
let (arena2, start2) = make_string_arena_fa(b"bar", fm2.clone());
let (merged, merged_start) = merge_arena_dfas(&arena1, start1, &arena2, start2);
assert!(
matches_value(&merged, merged_start, b"foo"),
"Merged should match 'foo'"
);
assert!(
matches_value(&merged, merged_start, b"bar"),
"Merged should match 'bar'"
);
assert!(
!matches_value(&merged, merged_start, b"baz"),
"Merged should NOT match 'baz'"
);
}
#[test]
fn test_string_arena_fa_merge_common_prefix() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = make_string_arena_fa(b"prefix_one", fm1.clone());
let (arena2, start2) = make_string_arena_fa(b"prefix_two", fm2.clone());
let (merged, merged_start) = merge_arena_dfas(&arena1, start1, &arena2, start2);
assert!(
matches_value(&merged, merged_start, b"prefix_one"),
"Merged should match 'prefix_one'"
);
assert!(
matches_value(&merged, merged_start, b"prefix_two"),
"Merged should match 'prefix_two'"
);
assert!(
!matches_value(&merged, merged_start, b"prefix"),
"Merged should NOT match 'prefix'"
);
assert!(
!matches_value(&merged, merged_start, b"prefix_"),
"Merged should NOT match 'prefix_'"
);
}
}
#[cfg(test)]
mod prefix_arena_tests {
use super::*;
fn matches_value(arena: &StateArena, start: StateId, value: &[u8]) -> bool {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, value, &mut bufs);
!bufs.transitions.is_empty()
}
#[test]
fn test_prefix_arena_fa_basic() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_prefix_arena_fa(b"hello", fm.clone());
assert!(
matches_value(&arena, start, b"hello"),
"Should match 'hello'"
);
assert!(
matches_value(&arena, start, b"helloworld"),
"Should match 'helloworld'"
);
assert!(
matches_value(&arena, start, b"hello_test"),
"Should match 'hello_test'"
);
assert!(
matches_value(&arena, start, b"hello123"),
"Should match 'hello123'"
);
assert!(
!matches_value(&arena, start, b"hell"),
"Should NOT match 'hell' (shorter than prefix)"
);
assert!(
!matches_value(&arena, start, b"world"),
"Should NOT match 'world'"
);
assert!(
!matches_value(&arena, start, b""),
"Should NOT match empty string"
);
}
#[test]
fn test_prefix_arena_fa_empty_prefix() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_prefix_arena_fa(b"", fm.clone());
assert!(
matches_value(&arena, start, b""),
"Should match empty string"
);
assert!(
matches_value(&arena, start, b"anything"),
"Should match 'anything'"
);
assert!(
matches_value(&arena, start, b"hello world"),
"Should match 'hello world'"
);
}
#[test]
fn test_prefix_arena_fa_single_char() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_prefix_arena_fa(b"a", fm.clone());
assert!(matches_value(&arena, start, b"a"), "Should match 'a'");
assert!(matches_value(&arena, start, b"abc"), "Should match 'abc'");
assert!(!matches_value(&arena, start, b"b"), "Should NOT match 'b'");
assert!(
!matches_value(&arena, start, b""),
"Should NOT match empty string"
);
}
#[test]
fn test_prefix_arena_fa_utf8() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_prefix_arena_fa("caf".as_bytes(), fm.clone());
assert!(
matches_value(&arena, start, "café".as_bytes()),
"Should match 'café'"
);
assert!(
matches_value(&arena, start, "cafeteria".as_bytes()),
"Should match 'cafeteria'"
);
assert!(
!matches_value(&arena, start, "ca".as_bytes()),
"Should NOT match 'ca'"
);
}
#[test]
fn test_prefix_arena_fa_merge() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = make_prefix_arena_fa(b"foo", fm1.clone());
let (arena2, start2) = make_prefix_arena_fa(b"bar", fm2.clone());
let (merged, merged_start) = merge_arena_dfas(&arena1, start1, &arena2, start2);
assert!(
matches_value(&merged, merged_start, b"foo"),
"Merged should match 'foo'"
);
assert!(
matches_value(&merged, merged_start, b"foobar"),
"Merged should match 'foobar'"
);
assert!(
matches_value(&merged, merged_start, b"bar"),
"Merged should match 'bar'"
);
assert!(
matches_value(&merged, merged_start, b"barfoo"),
"Merged should match 'barfoo'"
);
assert!(
!matches_value(&merged, merged_start, b"baz"),
"Merged should NOT match 'baz'"
);
}
}
#[cfg(test)]
mod shellstyle_arena_tests {
use super::*;
fn matches_value(arena: &StateArena, start: StateId, value: &[u8]) -> bool {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, value, &mut bufs);
!bufs.transitions.is_empty()
}
#[test]
fn test_shellstyle_arena_fa_prefix_wildcard() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_shellstyle_arena_fa(b"foo*", fm.clone());
assert!(matches_value(&arena, start, b"foo"), "Should match 'foo'");
assert!(
matches_value(&arena, start, b"foobar"),
"Should match 'foobar'"
);
assert!(
matches_value(&arena, start, b"foo123"),
"Should match 'foo123'"
);
assert!(
!matches_value(&arena, start, b"fo"),
"Should NOT match 'fo'"
);
assert!(
!matches_value(&arena, start, b"bar"),
"Should NOT match 'bar'"
);
}
#[test]
fn test_shellstyle_arena_fa_suffix_wildcard() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_shellstyle_arena_fa(b"*bar", fm.clone());
assert!(matches_value(&arena, start, b"bar"), "Should match 'bar'");
assert!(
matches_value(&arena, start, b"foobar"),
"Should match 'foobar'"
);
assert!(
matches_value(&arena, start, b"123bar"),
"Should match '123bar'"
);
assert!(
!matches_value(&arena, start, b"ba"),
"Should NOT match 'ba'"
);
assert!(
!matches_value(&arena, start, b"baz"),
"Should NOT match 'baz'"
);
}
#[test]
fn test_shellstyle_arena_fa_infix_wildcard() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_shellstyle_arena_fa(b"foo*bar", fm.clone());
assert!(
matches_value(&arena, start, b"foobar"),
"Should match 'foobar'"
);
assert!(
matches_value(&arena, start, b"foo_bar"),
"Should match 'foo_bar'"
);
assert!(
matches_value(&arena, start, b"foo123bar"),
"Should match 'foo123bar'"
);
assert!(
!matches_value(&arena, start, b"foo"),
"Should NOT match 'foo'"
);
assert!(
!matches_value(&arena, start, b"bar"),
"Should NOT match 'bar'"
);
assert!(
!matches_value(&arena, start, b"foobaz"),
"Should NOT match 'foobaz'"
);
}
#[test]
fn test_shellstyle_arena_fa_no_wildcard() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_shellstyle_arena_fa(b"hello", fm.clone());
assert!(
matches_value(&arena, start, b"hello"),
"Should match 'hello'"
);
assert!(
!matches_value(&arena, start, b"helloworld"),
"Should NOT match 'helloworld'"
);
assert!(
!matches_value(&arena, start, b"hell"),
"Should NOT match 'hell'"
);
}
#[test]
fn test_shellstyle_arena_fa_only_wildcard() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_shellstyle_arena_fa(b"*", fm.clone());
assert!(matches_value(&arena, start, b""), "Should match empty");
assert!(
matches_value(&arena, start, b"anything"),
"Should match 'anything'"
);
assert!(
matches_value(&arena, start, b"foo bar baz"),
"Should match 'foo bar baz'"
);
}
#[test]
fn test_shellstyle_arena_fa_double_wildcard() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_shellstyle_arena_fa(b"*foo*", fm.clone());
assert!(matches_value(&arena, start, b"foo"), "Should match 'foo'");
assert!(
matches_value(&arena, start, b"foobar"),
"Should match 'foobar'"
);
assert!(
matches_value(&arena, start, b"barfoo"),
"Should match 'barfoo'"
);
assert!(
matches_value(&arena, start, b"barfoobaz"),
"Should match 'barfoobaz'"
);
assert!(
!matches_value(&arena, start, b"bar"),
"Should NOT match 'bar'"
);
}
#[test]
fn test_shellstyle_arena_fa_merge() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = make_shellstyle_arena_fa(b"foo*", fm1.clone());
let (arena2, start2) = make_shellstyle_arena_fa(b"*bar", fm2.clone());
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
assert!(
matches_value(&merged, merged_start, b"foo"),
"Merged should match 'foo'"
);
assert!(
matches_value(&merged, merged_start, b"bar"),
"Merged should match 'bar'"
);
assert!(
matches_value(&merged, merged_start, b"foobar"),
"Merged should match 'foobar'"
);
assert!(
!matches_value(&merged, merged_start, b"baz"),
"Merged should NOT match 'baz'"
);
}
}
#[cfg(test)]
mod wildcard_arena_tests {
use super::*;
fn matches_value(arena: &StateArena, start: StateId, value: &[u8]) -> bool {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, value, &mut bufs);
!bufs.transitions.is_empty()
}
#[test]
fn test_wildcard_arena_fa_basic() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_wildcard_arena_fa(b"foo*bar", fm.clone());
assert!(
matches_value(&arena, start, b"foobar"),
"Should match 'foobar'"
);
assert!(
matches_value(&arena, start, b"foo123bar"),
"Should match 'foo123bar'"
);
assert!(
!matches_value(&arena, start, b"foo"),
"Should NOT match 'foo'"
);
}
#[test]
fn test_wildcard_arena_fa_escape_star() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_wildcard_arena_fa(b"foo\\*bar", fm.clone());
assert!(
matches_value(&arena, start, b"foo*bar"),
"Should match 'foo*bar'"
);
assert!(
!matches_value(&arena, start, b"foobar"),
"Should NOT match 'foobar'"
);
assert!(
!matches_value(&arena, start, b"foo123bar"),
"Should NOT match 'foo123bar'"
);
}
#[test]
fn test_wildcard_arena_fa_escape_backslash() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_wildcard_arena_fa(b"foo\\\\bar", fm.clone());
assert!(
matches_value(&arena, start, b"foo\\bar"),
"Should match 'foo\\bar'"
);
assert!(
!matches_value(&arena, start, b"foobar"),
"Should NOT match 'foobar'"
);
}
#[test]
fn test_wildcard_arena_fa_escape_with_wildcard() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_wildcard_arena_fa(b"foo\\\\*bar", fm.clone());
assert!(
matches_value(&arena, start, b"foo\\bar"),
"Should match 'foo\\bar'"
);
assert!(
matches_value(&arena, start, b"foo\\123bar"),
"Should match 'foo\\123bar'"
);
assert!(
!matches_value(&arena, start, b"foobar"),
"Should NOT match 'foobar'"
);
}
#[test]
fn test_wildcard_arena_fa_star_at_end_with_escape() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_wildcard_arena_fa(b"foo\\**", fm.clone());
assert!(matches_value(&arena, start, b"foo*"), "Should match 'foo*'");
assert!(
matches_value(&arena, start, b"foo*bar"),
"Should match 'foo*bar'"
);
assert!(
!matches_value(&arena, start, b"foo"),
"Should NOT match 'foo'"
);
assert!(
!matches_value(&arena, start, b"foobar"),
"Should NOT match 'foobar'"
);
}
#[test]
fn test_wildcard_arena_fa_no_escape() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_wildcard_arena_fa(b"hello", fm.clone());
assert!(
matches_value(&arena, start, b"hello"),
"Should match 'hello'"
);
assert!(
!matches_value(&arena, start, b"helloworld"),
"Should NOT match 'helloworld'"
);
}
#[test]
fn test_wildcard_arena_fa_merge() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = make_wildcard_arena_fa(b"foo\\*", fm1.clone());
let (arena2, start2) = make_wildcard_arena_fa(b"bar*", fm2.clone());
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
assert!(
matches_value(&merged, merged_start, b"foo*"),
"Merged should match 'foo*'"
);
assert!(
matches_value(&merged, merged_start, b"bar"),
"Merged should match 'bar'"
);
assert!(
!matches_value(&merged, merged_start, b"baz"),
"Merged should NOT match 'baz'"
);
}
}
#[cfg(test)]
mod anything_but_arena_tests {
use super::*;
fn matches_value(arena: &StateArena, start: StateId, value: &[u8]) -> bool {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, value, &mut bufs);
!bufs.transitions.is_empty()
}
#[test]
fn test_anything_but_arena_fa_single_value() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let excluded = vec![b"foo".to_vec()];
let (arena, start) = make_anything_but_arena_fa(&excluded, fm.clone());
assert!(
!matches_value(&arena, start, b"foo"),
"Should NOT match excluded 'foo'"
);
assert!(matches_value(&arena, start, b"bar"), "Should match 'bar'");
assert!(
matches_value(&arena, start, b"foobar"),
"Should match 'foobar' (longer than excluded)"
);
assert!(matches_value(&arena, start, b"fo"), "Should match 'fo'");
assert!(matches_value(&arena, start, b""), "Should match empty");
}
#[test]
fn test_anything_but_arena_fa_multiple_values() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let excluded = vec![b"foo".to_vec(), b"bar".to_vec()];
let (arena, start) = make_anything_but_arena_fa(&excluded, fm.clone());
assert!(
!matches_value(&arena, start, b"foo"),
"Should NOT match excluded 'foo'"
);
assert!(
!matches_value(&arena, start, b"bar"),
"Should NOT match excluded 'bar'"
);
assert!(matches_value(&arena, start, b"baz"), "Should match 'baz'");
assert!(
matches_value(&arena, start, b"foobar"),
"Should match 'foobar'"
);
}
#[test]
fn test_anything_but_arena_fa_common_prefix() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let excluded = vec![b"foo".to_vec(), b"foobar".to_vec()];
let (arena, start) = make_anything_but_arena_fa(&excluded, fm.clone());
assert!(
!matches_value(&arena, start, b"foo"),
"Should NOT match excluded 'foo'"
);
assert!(
!matches_value(&arena, start, b"foobar"),
"Should NOT match excluded 'foobar'"
);
assert!(matches_value(&arena, start, b"fo"), "Should match 'fo'");
assert!(matches_value(&arena, start, b"foob"), "Should match 'foob'");
assert!(
matches_value(&arena, start, b"foobarbaz"),
"Should match 'foobarbaz'"
);
}
#[test]
fn test_anything_but_arena_fa_empty_excluded() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let excluded: Vec<Vec<u8>> = vec![];
let (arena, start) = make_anything_but_arena_fa(&excluded, fm.clone());
assert!(
matches_value(&arena, start, b"anything"),
"Should match 'anything'"
);
assert!(matches_value(&arena, start, b""), "Should match empty");
}
#[test]
fn test_anything_but_arena_fa_merge() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = make_anything_but_arena_fa(&[b"foo".to_vec()], fm1.clone());
let (arena2, start2) = make_string_arena_fa(b"bar", fm2.clone());
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
assert!(
!matches_value(&merged, merged_start, b"foo"),
"Merged should NOT match 'foo'"
);
assert!(
matches_value(&merged, merged_start, b"bar"),
"Merged should match 'bar'"
);
assert!(
matches_value(&merged, merged_start, b"baz"),
"Merged should match 'baz'"
);
}
}
#[cfg(test)]
mod monocase_arena_tests {
use super::*;
fn matches_value(arena: &StateArena, start: StateId, value: &[u8]) -> bool {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, value, &mut bufs);
!bufs.transitions.is_empty()
}
#[test]
fn test_monocase_arena_fa_single_char() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_monocase_arena_fa(b"A", fm.clone());
assert!(matches_value(&arena, start, b"A"), "Should match 'A'");
assert!(matches_value(&arena, start, b"a"), "Should match 'a'");
assert!(!matches_value(&arena, start, b"B"), "Should NOT match 'B'");
}
#[test]
fn test_monocase_arena_fa_two_chars() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_monocase_arena_fa(b"Ab", fm.clone());
assert!(matches_value(&arena, start, b"Ab"), "Should match 'Ab'");
assert!(matches_value(&arena, start, b"ab"), "Should match 'ab'");
assert!(matches_value(&arena, start, b"AB"), "Should match 'AB'");
assert!(matches_value(&arena, start, b"aB"), "Should match 'aB'");
assert!(
!matches_value(&arena, start, b"Ac"),
"Should NOT match 'Ac'"
);
}
#[test]
fn test_monocase_arena_fa_three_chars() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_monocase_arena_fa(b"cat", fm.clone());
assert!(matches_value(&arena, start, b"cat"), "Should match 'cat'");
assert!(matches_value(&arena, start, b"CAT"), "Should match 'CAT'");
assert!(matches_value(&arena, start, b"Cat"), "Should match 'Cat'");
}
#[test]
fn test_monocase_arena_fa_basic_ascii() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_monocase_arena_fa(b"Hello", fm.clone());
assert!(
matches_value(&arena, start, b"Hello"),
"Should match 'Hello'"
);
assert!(
matches_value(&arena, start, b"hello"),
"Should match 'hello'"
);
assert!(
matches_value(&arena, start, b"HELLO"),
"Should match 'HELLO'"
);
assert!(
matches_value(&arena, start, b"hElLo"),
"Should match 'hElLo'"
);
assert!(
!matches_value(&arena, start, b"world"),
"Should NOT match 'world'"
);
assert!(
!matches_value(&arena, start, b"hell"),
"Should NOT match 'hell'"
);
}
#[test]
fn test_monocase_arena_fa_empty() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_monocase_arena_fa(b"", fm.clone());
assert!(matches_value(&arena, start, b""), "Should match empty");
assert!(!matches_value(&arena, start, b"a"), "Should NOT match 'a'");
}
#[test]
fn test_monocase_arena_fa_no_case_chars() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_monocase_arena_fa(b"123", fm.clone());
assert!(matches_value(&arena, start, b"123"), "Should match '123'");
assert!(
!matches_value(&arena, start, b"456"),
"Should NOT match '456'"
);
}
#[test]
fn test_monocase_arena_fa_mixed_ascii() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let (arena, start) = make_monocase_arena_fa(b"Abc123", fm.clone());
assert!(
matches_value(&arena, start, b"Abc123"),
"Should match 'Abc123'"
);
assert!(
matches_value(&arena, start, b"abc123"),
"Should match 'abc123'"
);
assert!(
matches_value(&arena, start, b"ABC123"),
"Should match 'ABC123'"
);
assert!(
!matches_value(&arena, start, b"Abc124"),
"Should NOT match 'Abc124'"
);
}
#[test]
fn test_monocase_arena_fa_merge() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let (arena1, start1) = make_monocase_arena_fa(b"Foo", fm1.clone());
let (arena2, start2) = make_monocase_arena_fa(b"Bar", fm2.clone());
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
assert!(
matches_value(&merged, merged_start, b"foo"),
"Merged should match 'foo'"
);
assert!(
matches_value(&merged, merged_start, b"FOO"),
"Merged should match 'FOO'"
);
assert!(
matches_value(&merged, merged_start, b"bar"),
"Merged should match 'bar'"
);
assert!(
matches_value(&merged, merged_start, b"BAR"),
"Merged should match 'BAR'"
);
assert!(
!matches_value(&merged, merged_start, b"baz"),
"Merged should NOT match 'baz'"
);
}
#[test]
fn test_monocase_arena_fa_greek_sigma() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let pattern = "Σοφα".as_bytes();
let (arena, start) = make_monocase_arena_fa(pattern, fm.clone());
assert!(
matches_value(&arena, start, "Σοφα".as_bytes()),
"Original Greek should match"
);
assert!(
matches_value(&arena, start, "σοφα".as_bytes()),
"Lowercase sigma at start should match"
);
assert!(
matches_value(&arena, start, "ΣΟΦΑ".as_bytes()),
"All uppercase should match"
);
assert!(
matches_value(&arena, start, "σΟΦΑ".as_bytes()),
"Mixed case should match"
);
}
}
#[cfg(test)]
mod cidr_arena_tests {
use super::*;
use crate::json::CidrPattern;
fn matches_value(arena: &StateArena, start: StateId, value: &[u8]) -> bool {
let mut bufs = ArenaNfaBuffers::with_capacity();
traverse_arena_nfa(arena, start, value, &mut bufs);
!bufs.transitions.is_empty()
}
#[test]
fn test_cidr_arena_fa_ipv4_exact() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let cidr = CidrPattern::V4 {
network: [192, 168, 1, 1],
prefix_len: 32,
};
let (arena, start) = make_cidr_arena_fa(&cidr, fm.clone());
assert!(
matches_value(&arena, start, b"\"192.168.1.1\""),
"Should match exact IP"
);
assert!(
!matches_value(&arena, start, b"\"192.168.1.2\""),
"Should NOT match different IP"
);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_arena_fa_ipv4_24() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let cidr = CidrPattern::V4 {
network: [10, 0, 0, 0],
prefix_len: 24,
};
let (arena, start) = make_cidr_arena_fa(&cidr, fm.clone());
assert!(
matches_value(&arena, start, b"\"10.0.0.0\""),
"Should match 10.0.0.0"
);
assert!(
matches_value(&arena, start, b"\"10.0.0.1\""),
"Should match 10.0.0.1"
);
assert!(
matches_value(&arena, start, b"\"10.0.0.255\""),
"Should match 10.0.0.255"
);
assert!(
!matches_value(&arena, start, b"\"10.0.1.0\""),
"Should NOT match 10.0.1.0"
);
assert!(
!matches_value(&arena, start, b"\"192.168.1.1\""),
"Should NOT match 192.168.1.1"
);
}
#[test]
fn test_cidr_arena_fa_ipv4_range() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let cidr = CidrPattern::V4 {
network: [172, 16, 0, 0],
prefix_len: 30,
};
let (arena, start) = make_cidr_arena_fa(&cidr, fm.clone());
assert!(
matches_value(&arena, start, b"\"172.16.0.0\""),
"Should match 172.16.0.0"
);
assert!(
matches_value(&arena, start, b"\"172.16.0.1\""),
"Should match 172.16.0.1"
);
assert!(
matches_value(&arena, start, b"\"172.16.0.2\""),
"Should match 172.16.0.2"
);
assert!(
matches_value(&arena, start, b"\"172.16.0.3\""),
"Should match 172.16.0.3"
);
assert!(
!matches_value(&arena, start, b"\"172.16.0.4\""),
"Should NOT match 172.16.0.4"
);
}
#[test]
fn test_cidr_arena_fa_merge() {
let fm1 = Arc::new(FieldMatcher::with_match_id(1));
let fm2 = Arc::new(FieldMatcher::with_match_id(2));
let cidr1 = CidrPattern::V4 {
network: [10, 0, 0, 0],
prefix_len: 32,
};
let cidr2 = CidrPattern::V4 {
network: [192, 168, 0, 0],
prefix_len: 32,
};
let (arena1, start1) = make_cidr_arena_fa(&cidr1, fm1.clone());
let (arena2, start2) = make_cidr_arena_fa(&cidr2, fm2.clone());
let (merged, merged_start) = merge_arena_nfas(&arena1, start1, &arena2, start2);
assert!(
matches_value(&merged, merged_start, b"\"10.0.0.0\""),
"Merged should match 10.0.0.0"
);
assert!(
matches_value(&merged, merged_start, b"\"192.168.0.0\""),
"Merged should match 192.168.0.0"
);
assert!(
!matches_value(&merged, merged_start, b"\"172.16.0.0\""),
"Merged should NOT match 172.16.0.0"
);
}
#[test]
fn test_cidr_arena_fa_ipv6_basic() {
let fm = Arc::new(FieldMatcher::with_match_id(1));
let cidr = CidrPattern::V6 {
network: [0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
prefix_len: 32,
};
let (arena, start) = make_cidr_arena_fa(&cidr, fm.clone());
assert!(
matches_value(&arena, start, b"\"2001:db8:0:0:0:0:0:1\""),
"Should match 2001:db8:0:0:0:0:0:1"
);
assert!(
matches_value(&arena, start, b"\"2001:db8:ffff:ffff:ffff:ffff:ffff:ffff\""),
"Should match end of range"
);
assert!(
!matches_value(&arena, start, b"\"2001:db9:0:0:0:0:0:1\""),
"Should NOT match 2001:db9:..."
);
}
}
#[cfg(kani)]
mod kani_arena_proofs {
use super::*;
#[kani::proof]
#[kani::unwind(4)]
fn arena_dstep_symbolic_lookup() {
let c0: u8 = kani::any();
let c1: u8 = kani::any();
let c2: u8 = kani::any();
kani::assume(c0 > 0);
kani::assume(c1 > c0);
kani::assume(c2 > c1);
kani::assume((c2 as usize) <= BYTE_CEILING);
let s0 = StateId::from_index(kani::any::<u8>() as usize);
let s1 = StateId::from_index(kani::any::<u8>() as usize);
let s2 = StateId::from_index(kani::any::<u8>() as usize);
let mut table = ArenaSmallTable::new();
table.ceilings = smallvec![c0, c1, c2];
table.steps = smallvec![s0, s1, s2];
let byte: u8 = kani::any();
kani::assume((byte as usize) < BYTE_CEILING);
let result = table.dstep(byte);
if byte < c0 {
kani::assert(result == s0, "byte in first range must return s0");
} else if byte < c1 {
kani::assert(result == s1, "byte in second range must return s1");
} else if byte < c2 {
kani::assert(result == s2, "byte in third range must return s2");
} else {
kani::assert(
result == StateId::NONE,
"byte past last ceiling must return NONE",
);
}
}
}