use std::error::Error;
use regex_automata::{
dfa::{dense, Automaton, OverlappingState},
nfa::thompson,
Anchored, HalfMatch, Input, MatchError,
};
#[test]
fn quit_fwd() -> Result<(), Box<dyn Error>> {
let dfa = dense::Builder::new()
.configure(dense::Config::new().quit(b'x', true))
.build("[[:word:]]+$")?;
assert_eq!(
Err(MatchError::quit(b'x', 3)),
dfa.try_search_fwd(&Input::new(b"abcxyz"))
);
assert_eq!(
dfa.try_search_overlapping_fwd(
&Input::new(b"abcxyz"),
&mut OverlappingState::start()
),
Err(MatchError::quit(b'x', 3)),
);
Ok(())
}
#[test]
fn quit_rev() -> Result<(), Box<dyn Error>> {
let dfa = dense::Builder::new()
.configure(dense::Config::new().quit(b'x', true))
.thompson(thompson::Config::new().reverse(true))
.build("^[[:word:]]+")?;
assert_eq!(
Err(MatchError::quit(b'x', 3)),
dfa.try_search_rev(&Input::new(b"abcxyz"))
);
Ok(())
}
#[test]
#[should_panic]
fn quit_panics() {
dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false);
}
#[test]
fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> {
let mut config = dense::Config::new();
for b in 0x80..=0xFF {
config = config.quit(b, true);
}
let dfa = dense::Builder::new().configure(config).build(r"\b")?;
let expected = HalfMatch::must(0, 1);
assert_eq!(Ok(Some(expected)), dfa.try_search_fwd(&Input::new(b" a")));
Ok(())
}
#[test]
fn universal_start_search() -> Result<(), Box<dyn Error>> {
fn find<A: Automaton>(
dfa: &A,
haystack: &[u8],
) -> Result<Option<HalfMatch>, MatchError> {
let mut state = dfa
.universal_start_state(Anchored::No)
.expect("regex should not require lookbehind");
let mut last_match = None;
for (i, &b) in haystack.iter().enumerate() {
state = dfa.next_state(state, b);
if dfa.is_special_state(state) {
if dfa.is_match_state(state) {
last_match =
Some(HalfMatch::new(dfa.match_pattern(state, 0), i));
} else if dfa.is_dead_state(state) {
return Ok(last_match);
} else if dfa.is_quit_state(state) {
if last_match.is_some() {
return Ok(last_match);
}
return Err(MatchError::quit(b, i));
}
}
}
state = dfa.next_eoi_state(state);
if dfa.is_match_state(state) {
last_match = Some(HalfMatch::new(
dfa.match_pattern(state, 0),
haystack.len(),
));
}
Ok(last_match)
}
fn check_impl(
dfa: impl Automaton,
haystack: &str,
pat: usize,
offset: usize,
) -> Result<(), Box<dyn Error>> {
let haystack = haystack.as_bytes();
let mat = find(&dfa, haystack)?.unwrap();
assert_eq!(mat.pattern().as_usize(), pat);
assert_eq!(mat.offset(), offset);
Ok(())
}
fn check(
dfa: &dense::DFA<Vec<u32>>,
haystack: &str,
pat: usize,
offset: usize,
) -> Result<(), Box<dyn Error>> {
check_impl(dfa, haystack, pat, offset)?;
check_impl(dfa.to_sparse()?, haystack, pat, offset)?;
Ok(())
}
let dfa = dense::DFA::new(r"[a-z]+")?;
let haystack = "123 foobar 4567";
check(&dfa, haystack, 0, 10)?;
let dfa = dense::DFA::new(r"[0-9]{4}")?;
let haystack = "123 foobar 4567";
check(&dfa, haystack, 0, 15)?;
let dfa = dense::DFA::new_many(&[r"[a-z]+", r"[0-9]+"])?;
let haystack = "123 foobar 4567";
check(&dfa, haystack, 1, 3)?;
check(&dfa, &haystack[3..], 0, 7)?;
check(&dfa, &haystack[10..], 1, 5)?;
Ok(())
}