use crate::hir::CodepointClass;
use crate::nfa::{ByteClass, StateId};
pub const MAX_THREADS: usize = 256;
pub struct ThreadWorklist {
pub count: usize,
pub stride: usize,
pub states: Vec<u32>,
pub flags: Vec<u32>,
pub captures: Vec<i64>,
pub visited: Vec<u64>,
}
impl ThreadWorklist {
pub fn new(capture_count: u32, state_count: usize) -> Self {
let stride = (capture_count as usize + 1) * 2;
let bitmap_words = state_count.max(1).div_ceil(64);
Self {
count: 0,
stride,
states: vec![0; MAX_THREADS],
flags: vec![0; MAX_THREADS],
captures: vec![-1i64; MAX_THREADS * stride],
visited: vec![0; bitmap_words],
}
}
#[inline]
pub fn clear(&mut self) {
self.count = 0;
for word in &mut self.visited {
*word = 0;
}
for slot in &mut self.captures {
*slot = -1;
}
}
#[inline]
pub fn is_visited(&self, state: StateId) -> bool {
let idx = state as usize;
let word = idx / 64;
let bit = idx % 64;
if word >= self.visited.len() {
return false; }
(self.visited[word] & (1u64 << bit)) != 0
}
#[inline]
pub fn mark_visited(&mut self, state: StateId) {
let idx = state as usize;
let word = idx / 64;
let bit = idx % 64;
if word < self.visited.len() {
self.visited[word] |= 1u64 << bit;
}
}
#[inline]
pub fn add_thread(&mut self, state: StateId, flags: u32) -> Option<usize> {
if self.count >= MAX_THREADS {
return None;
}
let idx = self.count;
self.states[idx] = state;
self.flags[idx] = flags;
self.count += 1;
Some(idx)
}
#[inline]
pub fn get_capture(&self, thread_idx: usize, slot: usize) -> i64 {
self.captures[thread_idx * self.stride + slot]
}
#[inline]
pub fn set_capture(&mut self, thread_idx: usize, slot: usize, value: i64) {
self.captures[thread_idx * self.stride + slot] = value;
}
#[inline]
#[allow(dead_code)]
pub fn copy_captures(&mut self, from_idx: usize, to_idx: usize) {
let from_start = from_idx * self.stride;
let to_start = to_idx * self.stride;
for i in 0..self.stride {
self.captures[to_start + i] = self.captures[from_start + i];
}
}
#[inline]
#[allow(dead_code)]
pub fn stride(&self) -> usize {
self.stride
}
}
#[repr(C)]
pub struct LookaroundCache {
pub count: usize,
pub max_len: usize,
pub results: Vec<u64>,
pub computed: Vec<u64>,
}
impl LookaroundCache {
pub fn new(lookaround_count: usize, max_input_len: usize) -> Self {
let words_needed = max_input_len.div_ceil(64);
let total_words = lookaround_count * words_needed;
Self {
count: lookaround_count,
max_len: max_input_len,
results: vec![0; total_words],
computed: vec![0; total_words],
}
}
#[inline]
#[allow(dead_code)]
pub fn is_computed(&self, lookaround_id: usize, pos: usize) -> bool {
if lookaround_id >= self.count || pos >= self.max_len {
return false;
}
let words_per_la = self.max_len.div_ceil(64);
let word_idx = lookaround_id * words_per_la + pos / 64;
let bit = pos % 64;
(self.computed[word_idx] & (1u64 << bit)) != 0
}
#[inline]
#[allow(dead_code)]
pub fn get_result(&self, lookaround_id: usize, pos: usize) -> bool {
if lookaround_id >= self.count || pos >= self.max_len {
return false;
}
let words_per_la = self.max_len.div_ceil(64);
let word_idx = lookaround_id * words_per_la + pos / 64;
let bit = pos % 64;
(self.results[word_idx] & (1u64 << bit)) != 0
}
#[inline]
#[allow(dead_code)]
pub fn set_result(&mut self, lookaround_id: usize, pos: usize, result: bool) {
if lookaround_id >= self.count || pos >= self.max_len {
return;
}
let words_per_la = self.max_len.div_ceil(64);
let word_idx = lookaround_id * words_per_la + pos / 64;
let bit = pos % 64;
self.computed[word_idx] |= 1u64 << bit;
if result {
self.results[word_idx] |= 1u64 << bit;
}
}
#[allow(dead_code)]
pub fn clear(&mut self) {
for word in &mut self.results {
*word = 0;
}
for word in &mut self.computed {
*word = 0;
}
}
}
#[allow(dead_code)]
pub struct TaggedNfaContext {
pub current: ThreadWorklist,
pub next: ThreadWorklist,
pub best_captures: Vec<i64>,
pub best_end: i64,
pub best_priority: u32,
pub lookaround_cache: LookaroundCache,
pub stride: usize,
}
#[allow(dead_code)]
impl TaggedNfaContext {
pub fn new(
capture_count: u32,
state_count: usize,
lookaround_count: usize,
max_input_len: usize,
) -> Self {
let stride = (capture_count as usize + 1) * 2;
Self {
current: ThreadWorklist::new(capture_count, state_count),
next: ThreadWorklist::new(capture_count, state_count),
best_captures: vec![-1; stride],
best_end: -1,
best_priority: 0,
lookaround_cache: LookaroundCache::new(lookaround_count, max_input_len),
stride,
}
}
pub fn reset(&mut self) {
self.current.clear();
self.next.clear();
for slot in &mut self.best_captures {
*slot = -1;
}
self.best_end = -1;
self.best_priority = 0;
self.lookaround_cache.clear();
}
#[inline]
pub fn swap_worklists(&mut self) {
std::mem::swap(&mut self.current, &mut self.next);
self.next.clear();
}
#[inline]
pub fn get_best_capture(&self, group: usize) -> Option<(usize, usize)> {
let start_idx = group * 2;
let end_idx = group * 2 + 1;
if start_idx < self.best_captures.len() && end_idx < self.best_captures.len() {
let start = self.best_captures[start_idx];
let end = self.best_captures[end_idx];
if start >= 0 && end >= 0 {
return Some((start as usize, end as usize));
}
}
None
}
}
#[derive(Debug, Clone)]
pub enum PatternStep {
Byte(u8),
ByteClass(ByteClass),
GreedyPlus(ByteClass),
#[allow(dead_code)]
GreedyStar(ByteClass),
GreedyPlusLookahead(ByteClass, Vec<PatternStep>, bool),
#[allow(dead_code)]
GreedyStarLookahead(ByteClass, Vec<PatternStep>, bool),
NonGreedyPlus(ByteClass, Box<PatternStep>),
NonGreedyStar(ByteClass, Box<PatternStep>),
Alt(Vec<Vec<PatternStep>>),
CaptureStart(u32),
CaptureEnd(u32),
#[allow(dead_code)]
CodepointClass(CodepointClass, StateId),
GreedyCodepointPlus(CodepointClass),
WordBoundary,
NotWordBoundary,
PositiveLookahead(Vec<PatternStep>),
NegativeLookahead(Vec<PatternStep>),
PositiveLookbehind(Vec<PatternStep>, usize),
NegativeLookbehind(Vec<PatternStep>, usize),
Backref(u32),
StartOfText,
EndOfText,
StartOfLine,
EndOfLine,
}
#[inline]
pub fn is_word_char(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_'
}