use log::debug;
use regex_automata::nfa::thompson::NFA;
use regex_automata::util::prefilter::Prefilter;
use regex_automata::util::primitives::NonMaxUsize;
use regex_automata::{dfa, hybrid, HalfMatch, Match, MatchKind, PatternID};
use crate::cursor::Cursor;
use crate::engines::meta::error::{BuildError, RetryFailError};
use crate::engines::meta::regex::RegexInfo;
use crate::engines::pikevm;
use crate::Input;
#[derive(Debug)]
pub(crate) struct PikeVM(PikeVMEngine);
impl PikeVM {
pub(crate) fn new(
info: &RegexInfo,
pre: Option<Prefilter>,
nfa: &NFA,
) -> Result<PikeVM, BuildError> {
PikeVMEngine::new(info, pre, nfa).map(PikeVM)
}
pub(crate) fn create_cache(&self) -> PikeVMCache {
PikeVMCache::new(self)
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn get(&self) -> &PikeVMEngine {
&self.0
}
}
#[derive(Debug)]
pub(crate) struct PikeVMEngine(pikevm::PikeVM);
impl PikeVMEngine {
pub(crate) fn new(
info: &RegexInfo,
pre: Option<Prefilter>,
nfa: &NFA,
) -> Result<PikeVMEngine, BuildError> {
let pikevm_config =
pikevm::Config::new().match_kind(info.config().get_match_kind()).prefilter(pre);
let engine = pikevm::Builder::new()
.configure(pikevm_config)
.build_from_nfa(nfa.clone())
.map_err(BuildError::nfa)?;
debug!("PikeVM built");
Ok(PikeVMEngine(engine))
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn is_match(&self, cache: &mut PikeVMCache, input: &mut Input<impl Cursor>) -> bool {
crate::engines::pikevm::is_match(&self.0, cache.0.as_mut().unwrap(), input)
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn search_slots(
&self,
cache: &mut PikeVMCache,
input: &mut Input<impl Cursor>,
slots: &mut [Option<NonMaxUsize>],
) -> Option<PatternID> {
crate::engines::pikevm::search_slots(&self.0, cache.0.as_mut().unwrap(), input, slots)
}
}
#[derive(Clone, Debug)]
pub(crate) struct PikeVMCache(Option<pikevm::Cache>);
impl PikeVMCache {
pub(crate) fn none() -> PikeVMCache {
PikeVMCache(None)
}
pub(crate) fn new(builder: &PikeVM) -> PikeVMCache {
PikeVMCache(Some(pikevm::Cache::new(&builder.get().0)))
}
pub(crate) fn reset(&mut self, builder: &PikeVM) {
self.0.as_mut().unwrap().reset(&builder.get().0);
}
pub(crate) fn memory_usage(&self) -> usize {
self.0.as_ref().map_or(0, |c| c.memory_usage())
}
}
#[derive(Debug)]
pub(crate) struct Hybrid(Option<HybridEngine>);
impl Hybrid {
pub(crate) fn none() -> Hybrid {
Hybrid(None)
}
pub(crate) fn new(info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, nfarev: &NFA) -> Hybrid {
Hybrid(HybridEngine::new(info, pre, nfa, nfarev))
}
pub(crate) fn create_cache(&self) -> HybridCache {
HybridCache::new(self)
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn get(&self, _input: &mut Input<impl Cursor>) -> Option<&HybridEngine> {
let engine = self.0.as_ref()?;
Some(engine)
}
pub(crate) fn is_some(&self) -> bool {
self.0.is_some()
}
}
#[derive(Debug)]
pub(crate) struct HybridEngine(hybrid::regex::Regex);
impl HybridEngine {
pub(crate) fn new(
info: &RegexInfo,
pre: Option<Prefilter>,
nfa: &NFA,
nfarev: &NFA,
) -> Option<HybridEngine> {
{
if !info.config().get_hybrid() {
return None;
}
let dfa_config = hybrid::dfa::Config::new()
.match_kind(info.config().get_match_kind())
.prefilter(pre.clone())
.starts_for_each_pattern(true)
.byte_classes(info.config().get_byte_classes())
.unicode_word_boundary(true)
.specialize_start_states(pre.is_some())
.cache_capacity(info.config().get_hybrid_cache_capacity())
.skip_cache_capacity_check(false)
.minimum_cache_clear_count(Some(3))
.minimum_bytes_per_state(Some(10));
let result = hybrid::dfa::Builder::new()
.configure(dfa_config.clone())
.build_from_nfa(nfa.clone());
let fwd = match result {
Ok(fwd) => fwd,
Err(_err) => {
debug!("forward lazy DFA failed to build: {}", _err);
return None;
}
};
let result = hybrid::dfa::Builder::new()
.configure(
dfa_config
.clone()
.match_kind(MatchKind::All)
.prefilter(None)
.specialize_start_states(false),
)
.build_from_nfa(nfarev.clone());
let rev = match result {
Ok(rev) => rev,
Err(_err) => {
debug!("reverse lazy DFA failed to build: {}", _err);
return None;
}
};
let engine = hybrid::regex::Builder::new().build_from_dfas(fwd, rev);
debug!("lazy DFA built");
Some(HybridEngine(engine))
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn try_search(
&self,
cache: &mut HybridCache,
input: &mut Input<impl Cursor>,
) -> Result<Option<Match>, RetryFailError> {
let cache = cache.0.as_mut().unwrap();
crate::engines::hybrid::try_search(&self.0, cache, input).map_err(|e| e.into())
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn try_search_half_fwd(
&self,
cache: &mut HybridCache,
input: &mut Input<impl Cursor>,
) -> Result<Option<HalfMatch>, RetryFailError> {
let fwd = self.0.forward();
let fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0;
crate::engines::hybrid::try_search_fwd(fwd, fwdcache, input).map_err(|e| e.into())
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn try_search_half_rev(
&self,
cache: &mut HybridCache,
input: &mut Input<impl Cursor>,
) -> Result<Option<HalfMatch>, RetryFailError> {
let rev = self.0.reverse();
let revcache = cache.0.as_mut().unwrap().as_parts_mut().1;
crate::engines::hybrid::try_search_rev(rev, revcache, input).map_err(|e| e.into())
}
}
#[derive(Clone, Debug)]
pub(crate) struct HybridCache(Option<hybrid::regex::Cache>);
impl HybridCache {
pub(crate) fn none() -> HybridCache {
HybridCache(None)
}
pub(crate) fn new(builder: &Hybrid) -> HybridCache {
HybridCache(builder.0.as_ref().map(|e| e.0.create_cache()))
}
pub(crate) fn reset(&mut self, builder: &Hybrid) {
if let Some(ref e) = builder.0 {
self.0.as_mut().unwrap().reset(&e.0);
}
}
pub(crate) fn memory_usage(&self) -> usize {
{
self.0.as_ref().map_or(0, |c| c.memory_usage())
}
}
}
#[derive(Debug)]
pub(crate) struct DFA(Option<DFAEngine>);
impl DFA {
pub(crate) fn none() -> DFA {
DFA(None)
}
pub(crate) fn new(info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, nfarev: &NFA) -> DFA {
DFA(DFAEngine::new(info, pre, nfa, nfarev))
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn get(&self, _input: &mut Input<impl Cursor>) -> Option<&DFAEngine> {
let engine = self.0.as_ref()?;
Some(engine)
}
pub(crate) fn is_some(&self) -> bool {
self.0.is_some()
}
pub(crate) fn memory_usage(&self) -> usize {
self.0.as_ref().map_or(0, |e| e.memory_usage())
}
}
#[derive(Debug)]
pub(crate) struct DFAEngine(dfa::regex::Regex);
impl DFAEngine {
pub(crate) fn new(
info: &RegexInfo,
pre: Option<Prefilter>,
nfa: &NFA,
nfarev: &NFA,
) -> Option<DFAEngine> {
{
if !info.config().get_dfa() {
return None;
}
if let Some(state_limit) = info.config().get_dfa_state_limit() {
if nfa.states().len() > state_limit {
debug!(
"skipping full DFA because NFA has {} states, \
which exceeds the heuristic limit of {}",
nfa.states().len(),
state_limit,
);
return None;
}
}
let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4);
let dfa_config = dfa::dense::Config::new()
.match_kind(info.config().get_match_kind())
.prefilter(pre.clone())
.starts_for_each_pattern(true)
.byte_classes(info.config().get_byte_classes())
.unicode_word_boundary(true)
.specialize_start_states(pre.is_some())
.determinize_size_limit(size_limit)
.dfa_size_limit(size_limit);
let result =
dfa::dense::Builder::new().configure(dfa_config.clone()).build_from_nfa(nfa);
let fwd = match result {
Ok(fwd) => fwd,
Err(_err) => {
debug!("forward full DFA failed to build: {}", _err);
return None;
}
};
let result = dfa::dense::Builder::new()
.configure(
dfa_config
.clone()
.start_kind(dfa::StartKind::Anchored)
.match_kind(MatchKind::All)
.prefilter(None)
.specialize_start_states(false),
)
.build_from_nfa(nfarev);
let rev = match result {
Ok(rev) => rev,
Err(_err) => {
debug!("reverse full DFA failed to build: {}", _err);
return None;
}
};
let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev);
debug!(
"fully compiled forward and reverse DFAs built, {} bytes",
engine.forward().memory_usage() + engine.reverse().memory_usage(),
);
Some(DFAEngine(engine))
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn try_search(
&self,
input: &mut Input<impl Cursor>,
) -> Result<Option<Match>, RetryFailError> {
crate::engines::dfa::try_search(&self.0, input).map_err(|err| err.into())
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn try_search_half_fwd(
&self,
input: &mut Input<impl Cursor>,
) -> Result<Option<HalfMatch>, RetryFailError> {
crate::engines::dfa::try_search_fwd(self.0.forward(), input).map_err(|e| e.into())
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn try_search_half_rev(
&self,
input: &mut Input<impl Cursor>,
) -> Result<Option<HalfMatch>, RetryFailError> {
crate::engines::dfa::try_search_rev(self.0.reverse(), input).map_err(|e| e.into())
}
pub(crate) fn memory_usage(&self) -> usize {
self.0.forward().memory_usage() + self.0.reverse().memory_usage()
}
}