use regex_automata::{
dfa::{Automaton, StartError},
util::{escape::DebugByte, prefilter::Prefilter, primitives::StateID, start},
Anchored, HalfMatch, MatchError,
};
use crate::{cursor::Cursor, engines::dfa::accel, literal, util::empty, Input};
#[inline]
pub fn try_search_fwd<C: Cursor, A: Automaton>(
dfa: &A,
input: &mut Input<C>,
) -> Result<Option<HalfMatch>, MatchError> {
let utf8empty = dfa.has_empty() && dfa.is_utf8();
let hm = match find_fwd(dfa, input)? {
None => return Ok(None),
Some(hm) if !utf8empty => return Ok(Some(hm)),
Some(hm) => hm,
};
empty::skip_splits_fwd(input, hm, hm.offset(), |input| {
let got = find_fwd(dfa, input)?;
Ok(got.map(|hm| (hm, hm.offset())))
})
}
#[inline]
pub fn try_search_rev<C: Cursor, A: Automaton>(
dfa: &A,
input: &mut Input<C>,
) -> Result<Option<HalfMatch>, MatchError> {
let utf8empty = dfa.has_empty() && dfa.is_utf8();
let hm = match find_rev(dfa, input)? {
None => return Ok(None),
Some(hm) if !utf8empty => return Ok(Some(hm)),
Some(hm) => hm,
};
empty::skip_splits_rev(input, hm, hm.offset(), |input| {
let got = find_rev(dfa, input)?;
Ok(got.map(|hm| (hm, hm.offset())))
})
}
#[inline(never)]
pub fn find_fwd<A: Automaton + ?Sized, C: Cursor>(
dfa: &A,
input: &mut Input<C>,
) -> Result<Option<HalfMatch>, MatchError> {
input.move_to(input.start());
if input.is_done() {
return Ok(None);
}
let pre = if input.get_anchored().is_anchored() { None } else { dfa.get_prefilter() };
if pre.is_some() {
if input.get_earliest() {
find_fwd_imp(dfa, input, pre, true)
} else {
find_fwd_imp(dfa, input, pre, false)
}
} else if input.get_earliest() {
find_fwd_imp(dfa, input, None, true)
} else {
find_fwd_imp(dfa, input, None, false)
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn find_fwd_imp<A: Automaton + ?Sized, C: Cursor>(
dfa: &A,
input: &mut Input<C>,
pre: Option<&'_ Prefilter>,
earliest: bool,
) -> Result<Option<HalfMatch>, MatchError> {
let universal_start = dfa.universal_start_state(Anchored::No).is_some();
let mut mat = None;
let mut sid = init_fwd(dfa, input)?;
if let Some(pre) = pre {
match literal::find(pre, input) {
None => return Ok(mat),
Some(ref span) => {
input.move_to(span.start);
if !universal_start {
sid = prefilter_restart(dfa, input)?;
}
}
}
}
macro_rules! next_unchecked {
($sid:expr) => {{
debug_assert!(input.chunk_pos() < input.chunk().len());
let byte = *input.chunk().get_unchecked(input.chunk_pos());
dfa.next_state_unchecked($sid, byte)
}};
}
'outer: loop {
let mut prev_sid;
loop {
if input.at() >= input.end()
|| input.chunk_pos() >= input.chunk().len() && !input.advance()
{
break 'outer;
}
prev_sid = unsafe { next_unchecked!(sid) };
if dfa.is_special_state(prev_sid) || input.at() + 3 >= input.end() {
core::mem::swap(&mut prev_sid, &mut sid);
break;
}
input.chunk_pos += 1;
if input.chunk_pos + 3 >= input.chunk().len() {
core::mem::swap(&mut prev_sid, &mut sid);
continue;
}
sid = unsafe { next_unchecked!(prev_sid) };
if dfa.is_special_state(sid) {
break;
}
input.chunk_pos += 1;
prev_sid = unsafe { next_unchecked!(sid) };
if dfa.is_special_state(prev_sid) {
core::mem::swap(&mut prev_sid, &mut sid);
break;
}
input.chunk_pos += 1;
sid = unsafe { next_unchecked!(prev_sid) };
if dfa.is_special_state(sid) {
break;
}
input.chunk_pos += 1;
}
if dfa.is_special_state(sid) {
if dfa.is_start_state(sid) {
if let Some(pre) = pre {
let old_pos = input.at();
match literal::find(pre, input) {
None => return Ok(mat),
Some(ref span) => {
if span.start > old_pos {
input.move_to(span.start);
if !universal_start {
sid = prefilter_restart(dfa, input)?;
}
continue;
} else if input.at() != old_pos {
input.move_to(old_pos);
}
}
}
} else if dfa.is_accel_state(sid) {
let needles = dfa.accelerator(sid);
input.chunk_pos = accel::find_fwd(needles, input, input.chunk_pos + 1)
.unwrap_or_else(|| input.chunk().len());
continue;
}
} else if dfa.is_match_state(sid) {
let pattern = dfa.match_pattern(sid, 0);
mat = Some(HalfMatch::new(pattern, input.at()));
if earliest {
return Ok(mat);
}
if dfa.is_accel_state(sid) {
let needles = dfa.accelerator(sid);
input.chunk_pos = accel::find_fwd(needles, input, input.chunk_pos + 1)
.unwrap_or_else(|| input.chunk().len());
continue;
}
} else if dfa.is_accel_state(sid) {
let needles = dfa.accelerator(sid);
input.chunk_pos = accel::find_fwd(needles, input, input.chunk_pos + 1)
.unwrap_or_else(|| input.chunk().len());
continue;
} else if dfa.is_dead_state(sid) {
return Ok(mat);
} else {
debug_assert!(dfa.is_quit_state(sid));
return Err(MatchError::quit(input.chunk()[input.chunk_pos], input.at()));
}
}
input.chunk_pos += 1;
}
eoi_fwd(dfa, input, &mut sid, &mut mat)?;
Ok(mat)
}
#[inline(never)]
pub fn find_rev<A: Automaton + ?Sized, C: Cursor>(
dfa: &A,
input: &mut Input<C>,
) -> Result<Option<HalfMatch>, MatchError> {
input.move_to(input.end());
if input.is_done() {
return Ok(None);
}
if input.get_earliest() {
find_rev_imp(dfa, input, true)
} else {
find_rev_imp(dfa, input, false)
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn find_rev_imp<A: Automaton + ?Sized, C: Cursor>(
dfa: &A,
input: &mut Input<C>,
earliest: bool,
) -> Result<Option<HalfMatch>, MatchError> {
let mut mat = None;
let mut sid = init_rev(dfa, input)?;
if input.start() == input.end() || input.chunk_pos == 0 && !input.backtrack() {
eoi_rev(dfa, input, &mut sid, &mut mat)?;
return Ok(mat);
}
input.chunk_pos -= 1;
macro_rules! next_unchecked {
($sid:expr) => {{
let byte = *input.chunk().get_unchecked(input.chunk_pos);
dfa.next_state_unchecked($sid, byte)
}};
}
#[rustfmt::skip]
macro_rules! ensure_chunk {
() => {
if input.chunk_pos == 0 && !input.backtrack() {
break;
}
};
}
loop {
let mut prev_sid;
while input.at() >= input.start() {
prev_sid = unsafe { next_unchecked!(sid) };
if dfa.is_special_state(prev_sid) || input.at() <= input.start().saturating_add(3) {
core::mem::swap(&mut prev_sid, &mut sid);
break;
}
ensure_chunk!();
input.chunk_pos -= 1;
if input.chunk_pos <= 2 {
core::mem::swap(&mut prev_sid, &mut sid);
continue;
}
sid = unsafe { next_unchecked!(prev_sid) };
if dfa.is_special_state(sid) {
break;
}
input.chunk_pos -= 1;
prev_sid = unsafe { next_unchecked!(sid) };
if dfa.is_special_state(prev_sid) {
core::mem::swap(&mut prev_sid, &mut sid);
break;
}
input.chunk_pos -= 1;
sid = unsafe { next_unchecked!(prev_sid) };
if dfa.is_special_state(sid) {
break;
}
input.chunk_pos -= 1;
}
if dfa.is_special_state(sid) {
if dfa.is_start_state(sid) {
if dfa.is_accel_state(sid) {
let needles = dfa.accelerator(sid);
input.chunk_pos = accel::find_rev(needles, input, input.chunk_pos)
.map(|i| i + 1)
.unwrap_or(0);
}
} else if dfa.is_match_state(sid) {
let pattern = dfa.match_pattern(sid, 0);
mat = Some(HalfMatch::new(pattern, input.at() + 1));
if earliest {
return Ok(mat);
}
if dfa.is_accel_state(sid) {
let needles = dfa.accelerator(sid);
input.chunk_pos = accel::find_rev(needles, input, input.chunk_pos)
.map(|i| i + 1)
.unwrap_or(0);
}
} else if dfa.is_accel_state(sid) {
let needles = dfa.accelerator(sid);
input.chunk_pos =
accel::find_rev(needles, input, input.chunk_pos).map(|i| i + 1).unwrap_or(0);
} else if dfa.is_dead_state(sid) {
return Ok(mat);
} else {
debug_assert!(dfa.is_quit_state(sid));
return Err(MatchError::quit(input.chunk()[input.chunk_pos], input.at()));
}
}
if input.at() <= input.start() {
break;
}
ensure_chunk!();
input.chunk_pos -= 1;
}
eoi_rev(dfa, input, &mut sid, &mut mat)?;
Ok(mat)
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn init_fwd<A: Automaton + ?Sized, C: Cursor>(
dfa: &A,
input: &mut Input<C>,
) -> Result<StateID, MatchError> {
let look_behind = input.ensure_look_behind();
let start_config = start::Config::new().look_behind(look_behind).anchored(input.get_anchored());
dfa.start_state(&start_config).map_err(|err| match err {
StartError::Quit { byte } => {
let offset = input.at().checked_sub(1).expect("no quit in start without look-behind");
MatchError::quit(byte, offset)
}
StartError::UnsupportedAnchored { mode } => MatchError::unsupported_anchored(mode),
_ => panic!("damm forward compatability"),
})
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn init_rev<A: Automaton + ?Sized, C: Cursor>(
dfa: &A,
input: &mut Input<C>,
) -> Result<StateID, MatchError> {
let chunk_pos = input.chunk_pos();
let mut look_ahead = input.chunk().get(chunk_pos).copied();
if chunk_pos + input.chunk_offset() == input.slice_span.end {
look_ahead = None
} else if look_ahead.is_none() && input.advance() {
look_ahead = input.chunk().first().copied();
input.backtrack();
}
let start_config = start::Config::new().look_behind(look_ahead).anchored(input.get_anchored());
dfa.start_state(&start_config).map_err(|err| match err {
StartError::Quit { byte } => {
let offset =
input.start().checked_sub(1).expect("no quit in start without look-behind");
MatchError::quit(byte, offset)
}
StartError::UnsupportedAnchored { mode } => MatchError::unsupported_anchored(mode),
_ => panic!("damm forward compatability"),
})
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn eoi_fwd<A: Automaton + ?Sized, C: Cursor>(
dfa: &A,
input: &mut Input<C>,
sid: &mut StateID,
mat: &mut Option<HalfMatch>,
) -> Result<(), MatchError> {
let sp = input.get_span();
input.move_to(sp.end);
match input.chunk().get(sp.end - input.chunk_offset()) {
Some(&b) if sp.end != input.slice_span.end => {
*sid = dfa.next_state(*sid, b);
if dfa.is_match_state(*sid) {
let pattern = dfa.match_pattern(*sid, 0);
*mat = Some(HalfMatch::new(pattern, sp.end));
} else if dfa.is_quit_state(*sid) {
return Err(MatchError::quit(b, sp.end));
}
}
_ => {
*sid = dfa.next_eoi_state(*sid);
if dfa.is_match_state(*sid) {
let pattern = dfa.match_pattern(*sid, 0);
*mat = Some(HalfMatch::new(pattern, sp.end));
}
debug_assert!(!dfa.is_quit_state(*sid));
}
}
Ok(())
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn eoi_rev<A: Automaton + ?Sized, C: Cursor>(
dfa: &A,
input: &mut Input<C>,
sid: &mut StateID,
mat: &mut Option<HalfMatch>,
) -> Result<(), MatchError> {
let sp = input.get_span();
if sp.start > input.slice_span.start {
input.move_to(input.start() - 1);
let byte = input.chunk()[sp.start - input.chunk_offset() - 1];
*sid = dfa.next_state(*sid, byte);
if dfa.is_match_state(*sid) {
let pattern = dfa.match_pattern(*sid, 0);
*mat = Some(HalfMatch::new(pattern, sp.start));
} else if dfa.is_quit_state(*sid) {
return Err(MatchError::quit(byte, sp.start - 1));
}
} else {
*sid = dfa.next_eoi_state(*sid);
if dfa.is_match_state(*sid) {
let pattern = dfa.match_pattern(*sid, 0);
*mat = Some(HalfMatch::new(pattern, 0));
}
debug_assert!(!dfa.is_quit_state(*sid));
}
Ok(())
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn prefilter_restart<A: Automaton + ?Sized, C: Cursor>(
dfa: &A,
input: &mut Input<C>,
) -> Result<StateID, MatchError> {
init_fwd(dfa, input)
}