use regex_automata::hybrid::dfa::{Cache, DFA};
use regex_automata::hybrid::{LazyStateID, StartError};
use regex_automata::util::prefilter::Prefilter;
use regex_automata::util::start;
use regex_automata::{HalfMatch, MatchError};
use crate::cursor::Cursor;
use crate::input::Input;
use crate::literal;
use crate::util::empty::{skip_splits_fwd, skip_splits_rev};
#[inline]
pub fn try_search_fwd<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
) -> Result<Option<HalfMatch>, MatchError> {
let utf8empty = dfa.get_nfa().has_empty() && dfa.get_nfa().is_utf8();
let hm = match find_fwd(dfa, cache, input)? {
None => return Ok(None),
Some(hm) if !utf8empty => return Ok(Some(hm)),
Some(hm) => hm,
};
skip_splits_fwd(input, hm, hm.offset(), |input| {
let got = find_fwd(dfa, cache, input)?;
Ok(got.map(|hm| (hm, hm.offset())))
})
}
#[inline]
pub fn try_search_rev<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
) -> Result<Option<HalfMatch>, MatchError> {
let utf8empty = dfa.get_nfa().has_empty() && dfa.get_nfa().is_utf8();
let hm = match find_rev(dfa, cache, input)? {
None => return Ok(None),
Some(hm) if !utf8empty => return Ok(Some(hm)),
Some(hm) => hm,
};
skip_splits_rev(input, hm, hm.offset(), |input| {
let got = find_rev(dfa, cache, input)?;
Ok(got.map(|hm| (hm, hm.offset())))
})
}
#[inline(never)]
pub(crate) fn find_fwd<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
) -> Result<Option<HalfMatch>, MatchError> {
input.move_to(input.start());
if input.is_done() {
return Ok(None);
}
let pre =
if input.get_anchored().is_anchored() { None } else { dfa.get_config().get_prefilter() };
if pre.is_some() {
if input.get_earliest() {
find_fwd_imp(dfa, cache, input, pre, true)
} else {
find_fwd_imp(dfa, cache, input, pre, false)
}
} else if input.get_earliest() {
find_fwd_imp(dfa, cache, input, None, true)
} else {
find_fwd_imp(dfa, cache, input, None, false)
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn find_fwd_imp<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
pre: Option<&'_ Prefilter>,
earliest: bool,
) -> Result<Option<HalfMatch>, MatchError> {
let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty();
let mut mat = None;
let mut sid = init_fwd(dfa, cache, input)?;
if let Some(pre) = pre {
match literal::find(pre, input) {
None => return Ok(mat),
Some(ref span) => {
input.move_to(span.start);
if !universal_start {
sid = prefilter_restart(dfa, cache, input)?;
}
}
}
}
macro_rules! next_unchecked {
($sid:expr) => {{
debug_assert!(input.chunk_pos() < input.chunk().len());
let byte = *input.chunk().get_unchecked(input.chunk_pos());
dfa.next_state_untagged_unchecked(cache, $sid, byte)
}};
}
macro_rules! ensure_chunk {
() => {{
if input.chunk_pos() >= input.chunk().len() && !input.advance() {
break;
}
}};
}
cache.search_start(input.at());
while input.at() < input.end() {
if sid.is_tagged() {
ensure_chunk!();
cache.search_update(input.at());
sid = dfa
.next_state(cache, sid, input.chunk()[input.chunk_pos])
.map_err(|_| gave_up(input.at()))?;
} else {
let mut prev_sid = sid;
while input.at() < input.end() {
ensure_chunk!();
prev_sid = unsafe { next_unchecked!(sid) };
if prev_sid.is_tagged() || input.at() + 3 >= input.end() {
core::mem::swap(&mut prev_sid, &mut sid);
break;
}
input.chunk_pos += 1;
if input.chunk_pos + 3 >= input.chunk().len() {
core::mem::swap(&mut prev_sid, &mut sid);
continue;
}
sid = unsafe { next_unchecked!(prev_sid) };
if sid.is_tagged() {
break;
}
input.chunk_pos += 1;
prev_sid = unsafe { next_unchecked!(sid) };
if prev_sid.is_tagged() {
core::mem::swap(&mut prev_sid, &mut sid);
break;
}
input.chunk_pos += 1;
sid = unsafe { next_unchecked!(prev_sid) };
if sid.is_tagged() {
break;
}
input.chunk_pos += 1;
}
if sid.is_unknown() {
cache.search_update(input.at());
sid = dfa
.next_state(cache, prev_sid, input.chunk()[input.chunk_pos])
.map_err(|_| gave_up(input.at()))?;
}
}
if sid.is_tagged() {
if sid.is_start() {
if let Some(pre) = pre {
let old_pos = input.at();
match literal::find(pre, input) {
None => {
cache.search_finish(input.at());
return Ok(mat);
}
Some(ref span) => {
if span.start > old_pos {
input.move_to(span.start);
if !universal_start {
sid = prefilter_restart(dfa, cache, input)?;
}
continue;
} else if input.at() != old_pos {
input.move_to(old_pos);
}
}
}
}
} else if sid.is_match() {
let pattern = dfa.match_pattern(cache, sid, 0);
mat = Some(HalfMatch::new(pattern, input.at()));
if earliest {
cache.search_finish(input.at());
return Ok(mat);
}
} else if sid.is_dead() {
cache.search_finish(input.at());
return Ok(mat);
} else if sid.is_quit() {
cache.search_finish(input.at());
return Err(MatchError::quit(input.chunk()[input.chunk_pos], input.at()));
} else {
debug_assert!(sid.is_unknown());
unreachable!("sid being unknown is a bug");
}
}
input.chunk_pos += 1;
}
eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?;
cache.search_finish(input.end());
Ok(mat)
}
#[inline(never)]
pub(crate) fn find_rev<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
) -> Result<Option<HalfMatch>, MatchError> {
input.move_to(input.end());
if input.is_done() {
return Ok(None);
}
if input.get_earliest() {
find_rev_imp(dfa, cache, input, true)
} else {
find_rev_imp(dfa, cache, input, false)
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn find_rev_imp<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
earliest: bool,
) -> Result<Option<HalfMatch>, MatchError> {
let mut mat = None;
let mut sid = init_rev(dfa, cache, input)?;
if input.start() == input.end() || input.chunk_pos == 0 && !input.backtrack() {
eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
return Ok(mat);
}
input.chunk_pos -= 1;
macro_rules! next_unchecked {
($sid:expr) => {{
let byte = *input.chunk().get_unchecked(input.chunk_pos);
dfa.next_state_untagged_unchecked(cache, $sid, byte)
}};
}
#[rustfmt::skip]
macro_rules! ensure_chunk {
() => {
if input.chunk_pos == 0 && !input.backtrack() {
break;
}
};
}
cache.search_start(input.at());
loop {
if sid.is_tagged() {
cache.search_update(input.at());
sid = dfa
.next_state(cache, sid, input.chunk()[input.chunk_pos])
.map_err(|_| gave_up(input.at()))?;
} else {
let mut prev_sid = sid;
while input.at() >= input.start() {
prev_sid = unsafe { next_unchecked!(sid) };
if prev_sid.is_tagged() || input.at() <= input.start().saturating_add(3) {
core::mem::swap(&mut prev_sid, &mut sid);
break;
}
ensure_chunk!();
input.chunk_pos -= 1;
if input.chunk_pos <= 2 {
core::mem::swap(&mut prev_sid, &mut sid);
continue;
}
sid = unsafe { next_unchecked!(prev_sid) };
if sid.is_tagged() {
break;
}
input.chunk_pos -= 1;
prev_sid = unsafe { next_unchecked!(sid) };
if prev_sid.is_tagged() {
core::mem::swap(&mut prev_sid, &mut sid);
break;
}
input.chunk_pos -= 1;
sid = unsafe { next_unchecked!(prev_sid) };
if sid.is_tagged() {
break;
}
input.chunk_pos -= 1;
}
if sid.is_unknown() {
cache.search_update(input.at());
sid = dfa
.next_state(cache, prev_sid, input.chunk()[input.chunk_pos])
.map_err(|_| gave_up(input.at()))?;
}
}
if sid.is_tagged() {
if sid.is_start() {
} else if sid.is_match() {
let pattern = dfa.match_pattern(cache, sid, 0);
mat = Some(HalfMatch::new(pattern, input.at() + 1));
if earliest {
cache.search_finish(input.at());
return Ok(mat);
}
} else if sid.is_dead() {
cache.search_finish(input.at());
return Ok(mat);
} else if sid.is_quit() {
cache.search_finish(input.at());
return Err(MatchError::quit(input.chunk()[input.chunk_pos], input.at()));
} else {
debug_assert!(sid.is_unknown());
unreachable!("sid being unknown is a bug");
}
}
if input.at() <= input.start() {
break;
}
ensure_chunk!();
input.chunk_pos -= 1;
}
cache.search_finish(input.start());
eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
Ok(mat)
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn init_fwd<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
) -> Result<LazyStateID, MatchError> {
let look_behind = input.ensure_look_behind();
let start_config = start::Config::new().look_behind(look_behind).anchored(input.get_anchored());
dfa.start_state(cache, &start_config).map_err(|err| match err {
StartError::Quit { byte } => {
let offset = input.at().checked_sub(1).expect("no quit in start without look-behind");
MatchError::quit(byte, offset)
}
StartError::UnsupportedAnchored { mode } => MatchError::unsupported_anchored(mode),
StartError::Cache { .. } => MatchError::gave_up(input.end()),
_ => panic!("damm forward compatability"),
})
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn init_rev<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
) -> Result<LazyStateID, MatchError> {
let chunk_pos = input.chunk_pos();
let mut look_ahead = input.chunk().get(chunk_pos).copied();
if chunk_pos + input.chunk_offset() == input.slice_span.end {
look_ahead = None
} else if look_ahead.is_none() && input.advance() {
look_ahead = input.chunk().first().copied();
input.backtrack();
}
let start_config = start::Config::new().look_behind(look_ahead).anchored(input.get_anchored());
dfa.start_state(cache, &start_config).map_err(|err| match err {
StartError::Quit { byte } => {
let offset =
input.start().checked_sub(1).expect("no quit in start without look-behind");
MatchError::quit(byte, offset)
}
StartError::UnsupportedAnchored { mode } => MatchError::unsupported_anchored(mode),
StartError::Cache { .. } => MatchError::gave_up(input.end()),
_ => panic!("damm forward compatability"),
})
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn eoi_fwd<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
sid: &mut LazyStateID,
mat: &mut Option<HalfMatch>,
) -> Result<(), MatchError> {
let sp = input.get_span();
input.move_to(sp.end);
match input.chunk().get(sp.end - input.chunk_offset()) {
Some(&b) if sp.end != input.slice_span.end => {
*sid = dfa.next_state(cache, *sid, b).map_err(|_| gave_up(sp.end))?;
if sid.is_match() {
let pattern = dfa.match_pattern(cache, *sid, 0);
*mat = Some(HalfMatch::new(pattern, sp.end));
} else if sid.is_quit() {
return Err(MatchError::quit(b, sp.end));
}
}
_ => {
*sid = dfa.next_eoi_state(cache, *sid).map_err(|_| gave_up(sp.end))?;
if sid.is_match() {
let pattern = dfa.match_pattern(cache, *sid, 0);
*mat = Some(HalfMatch::new(pattern, sp.end));
}
debug_assert!(!sid.is_quit());
}
}
Ok(())
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn eoi_rev<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
sid: &mut LazyStateID,
mat: &mut Option<HalfMatch>,
) -> Result<(), MatchError> {
let sp = input.get_span();
if sp.start > input.slice_span.start {
input.move_to(input.start() - 1);
let byte = input.chunk()[sp.start - input.chunk_offset() - 1];
*sid = dfa.next_state(cache, *sid, byte).map_err(|_| gave_up(sp.start))?;
if sid.is_match() {
let pattern = dfa.match_pattern(cache, *sid, 0);
*mat = Some(HalfMatch::new(pattern, sp.start));
} else if sid.is_quit() {
return Err(MatchError::quit(byte, sp.start - 1));
}
} else {
*sid = dfa.next_eoi_state(cache, *sid).map_err(|_| gave_up(sp.start))?;
if sid.is_match() {
let pattern = dfa.match_pattern(cache, *sid, 0);
*mat = Some(HalfMatch::new(pattern, 0));
}
debug_assert!(!sid.is_quit());
}
Ok(())
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn gave_up(offset: usize) -> MatchError {
MatchError::gave_up(offset)
}
#[cfg_attr(feature = "perf-inline", inline(always))]
fn prefilter_restart<C: Cursor>(
dfa: &DFA,
cache: &mut Cache,
input: &mut Input<C>,
) -> Result<LazyStateID, MatchError> {
init_fwd(dfa, cache, input)
}