use std::{fmt, io, str::FromStr};
pub use regex_automata::dfa::dense::BuildError;
use regex_automata::dfa::dense::DFA;
use regex_automata::dfa::Automaton;
use regex_automata::util::primitives::StateID;
use regex_automata::Anchored;
#[derive(Debug, Clone)]
pub struct Pattern<A = DFA<Vec<u32>>> {
automaton: A,
anchored: Anchored,
}
#[derive(Debug, Clone)]
pub struct Matcher<A = DFA<Vec<u32>>> {
automaton: A,
state: StateID,
}
impl Pattern {
pub fn new(pattern: &str) -> Result<Self, BuildError> {
let automaton = DFA::new(pattern)?;
Ok(Pattern {
automaton,
anchored: Anchored::No,
})
}
pub fn new_anchored(pattern: &str) -> Result<Self, BuildError> {
let automaton = DFA::new(pattern)?;
Ok(Pattern {
automaton,
anchored: Anchored::Yes,
})
}
}
impl FromStr for Pattern {
type Err = BuildError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}
impl<A: Automaton> Pattern<A> {
pub fn matcher(&self) -> Matcher<&'_ A> {
let config = regex_automata::util::start::Config::new().anchored(self.anchored);
Matcher {
automaton: &self.automaton,
state: self.automaton.start_state(&config).unwrap(),
}
}
#[inline]
pub fn matches(&self, s: &impl AsRef<str>) -> bool {
self.matcher().matches(s)
}
#[inline]
pub fn debug_matches(&self, d: &impl fmt::Debug) -> bool {
self.matcher().debug_matches(d)
}
#[inline]
pub fn display_matches(&self, d: &impl fmt::Display) -> bool {
self.matcher().display_matches(d)
}
#[inline]
pub fn read_matches(&self, io: impl io::Read) -> io::Result<bool> {
self.matcher().read_matches(io)
}
}
impl<A> Matcher<A>
where
A: Automaton,
{
#[inline]
fn advance(&mut self, input: u8) {
self.state = unsafe { self.automaton.next_state_unchecked(self.state, input) };
}
#[inline]
pub fn is_matched(&self) -> bool {
let eoi_state = self.automaton.next_eoi_state(self.state);
self.automaton.is_match_state(eoi_state)
}
pub fn matches(mut self, s: &impl AsRef<str>) -> bool {
for &byte in s.as_ref().as_bytes() {
self.advance(byte);
if self.automaton.is_dead_state(self.state) {
return false;
}
}
self.is_matched()
}
pub fn debug_matches(mut self, d: &impl fmt::Debug) -> bool {
use std::fmt::Write;
write!(&mut self, "{:?}", d).expect("matcher write impl should not fail");
self.is_matched()
}
pub fn display_matches(mut self, d: &impl fmt::Display) -> bool {
use std::fmt::Write;
write!(&mut self, "{}", d).expect("matcher write impl should not fail");
self.is_matched()
}
pub fn read_matches(mut self, io: impl io::Read + Sized) -> io::Result<bool> {
for r in io.bytes() {
self.advance(r?);
if self.automaton.is_dead_state(self.state) {
return Ok(false);
}
}
Ok(self.is_matched())
}
}
impl<A: Automaton> fmt::Write for Matcher<A> {
fn write_str(&mut self, s: &str) -> fmt::Result {
for &byte in s.as_bytes() {
self.advance(byte);
if self.automaton.is_dead_state(self.state) {
break;
}
}
Ok(())
}
}
impl<A: Automaton> io::Write for Matcher<A> {
fn write(&mut self, bytes: &[u8]) -> Result<usize, io::Error> {
let mut i = 0;
for &byte in bytes {
self.advance(byte);
i += 1;
if self.automaton.is_dead_state(self.state) {
break;
}
}
Ok(i)
}
fn flush(&mut self) -> Result<(), io::Error> {
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
struct Str<'a>(&'a str);
struct ReadStr<'a>(io::Cursor<&'a [u8]>);
impl<'a> fmt::Debug for Str<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl<'a> fmt::Display for Str<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl<'a> io::Read for ReadStr<'a> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.0.read(buf)
}
}
impl Str<'static> {
fn hello_world() -> Self {
Self::new("hello world")
}
}
impl<'a> Str<'a> {
fn new(s: &'a str) -> Self {
Str(s)
}
fn to_reader(self) -> ReadStr<'a> {
ReadStr(io::Cursor::new(self.0.as_bytes()))
}
}
fn test_debug_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat.debug_matches(&Str::hello_world()));
let pat = new_pattern("hel+o w[orl]{3}d").unwrap();
assert!(pat.debug_matches(&Str::hello_world()));
let pat = new_pattern("goodbye world").unwrap();
assert_eq!(pat.debug_matches(&Str::hello_world()), false);
}
fn test_display_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat.display_matches(&Str::hello_world()));
let pat = new_pattern("hel+o w[orl]{3}d").unwrap();
assert!(pat.display_matches(&Str::hello_world()));
let pat = new_pattern("goodbye world").unwrap();
assert_eq!(pat.display_matches(&Str::hello_world()), false);
}
fn test_reader_matches(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("hello world").unwrap();
assert!(pat
.read_matches(Str::hello_world().to_reader())
.expect("no io error should occur"));
let pat = new_pattern("hel+o w[orl]{3}d").unwrap();
assert!(pat
.read_matches(Str::hello_world().to_reader())
.expect("no io error should occur"));
let pat = new_pattern("goodbye world").unwrap();
assert_eq!(
pat.read_matches(Str::hello_world().to_reader())
.expect("no io error should occur"),
false
);
}
fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result<Pattern, BuildError>) {
let pat = new_pattern("a+b").unwrap();
assert!(pat.debug_matches(&Str::new("ab")));
assert!(pat.debug_matches(&Str::new("aaaab")));
assert!(pat.debug_matches(&Str::new("aaaaaaaaaab")));
assert_eq!(pat.debug_matches(&Str::new("b")), false);
assert_eq!(pat.debug_matches(&Str::new("abb")), false);
assert_eq!(pat.debug_matches(&Str::new("aaaaabb")), false);
}
mod anchored {
use super::*;
#[test]
fn debug_matches() {
test_debug_matches(Pattern::new_anchored)
}
#[test]
fn display_matches() {
test_display_matches(Pattern::new_anchored)
}
#[test]
fn reader_matches() {
test_reader_matches(Pattern::new_anchored)
}
#[test]
fn debug_rep_patterns() {
test_debug_rep_patterns(Pattern::new_anchored)
}
fn test_is_anchored(f: impl Fn(&Pattern, Str) -> bool) {
let pat = Pattern::new_anchored("a+b").unwrap();
assert!(f(&pat, Str::new("ab")));
assert!(f(&pat, Str::new("aaaab")));
assert!(f(&pat, Str::new("aaaaaaaaaab")));
assert!(!f(&pat, Str::new("bab")));
assert!(!f(&pat, Str::new("ffab")));
assert!(!f(&pat, Str::new("qqqqqqqaaaaab")));
}
#[test]
fn debug_is_anchored() {
test_is_anchored(|pat, input| pat.debug_matches(&input))
}
#[test]
fn display_is_anchored() {
test_is_anchored(|pat, input| pat.display_matches(&input));
}
#[test]
fn reader_is_anchored() {
test_is_anchored(|pat, input| {
pat.read_matches(input.to_reader())
.expect("no io error occurs")
});
}
fn test_explicitly_unanchored(f: impl Fn(&Pattern, Str) -> bool) {
let pat = Pattern::new_anchored(".*?a+b").unwrap();
assert!(f(&pat, Str::new("ab")));
assert!(f(&pat, Str::new("aaaab")));
assert!(f(&pat, Str::new("aaaaaaaaaab")));
assert!(f(&pat, Str::new("bab")));
assert!(f(&pat, Str::new("ffab")));
assert!(f(&pat, Str::new("qqqqqqqaaaaab")));
}
#[test]
fn debug_explicitly_unanchored() {
test_explicitly_unanchored(|pat, input| pat.debug_matches(&input))
}
#[test]
fn display_explicitly_unanchored() {
test_explicitly_unanchored(|pat, input| pat.display_matches(&input));
}
#[test]
fn reader_explicitly_unanchored() {
test_explicitly_unanchored(|pat, input| {
pat.read_matches(input.to_reader())
.expect("no io error occurs")
});
}
}
mod unanchored {
use super::*;
#[test]
fn debug_matches() {
test_debug_matches(Pattern::new)
}
#[test]
fn display_matches() {
test_display_matches(Pattern::new)
}
#[test]
fn reader_matches() {
test_reader_matches(Pattern::new)
}
#[test]
fn debug_rep_patterns() {
test_debug_rep_patterns(Pattern::new)
}
fn test_is_unanchored(f: impl Fn(&Pattern, Str) -> bool) {
let pat = Pattern::new("a+b").unwrap();
assert!(f(&pat, Str::new("ab")));
assert!(f(&pat, Str::new("aaaab")));
assert!(f(&pat, Str::new("aaaaaaaaaab")));
assert!(f(&pat, Str::new("bab")));
assert!(f(&pat, Str::new("ffab")));
assert!(f(&pat, Str::new("qqqfqqqqaaaaab")));
}
#[test]
fn debug_is_unanchored() {
test_is_unanchored(|pat, input| pat.debug_matches(&input))
}
#[test]
fn display_is_unanchored() {
test_is_unanchored(|pat, input| pat.display_matches(&input));
}
#[test]
fn reader_is_unanchored() {
test_is_unanchored(|pat, input| {
pat.read_matches(input.to_reader())
.expect("no io error occurs")
});
}
}
}