regex 1.3.3

An implementation of regular expressions for Rust. This implementation uses finite automata and guarantees linear time matching on all inputs.
Documentation
use regex::internal::ExecBuilder;

/// Given a regex, check if all of the backends produce the same
/// results on a number of different inputs.
///
/// For now this just throws quickcheck at the problem, which
/// is not very good because it only really tests half of the
/// problem space. It is pretty unlikely that a random string
/// will match any given regex, so this will probably just
/// be checking that the different backends fail in the same
/// way. This is still worthwhile to test, but is definitely not
/// the whole story.
///
/// TODO(ethan): In order to cover the other half of the problem
/// space, we should generate a random matching string by inspecting
/// the AST of the input regex. The right way to do this probably
/// involves adding a custom Arbitrary instance around a couple
/// of newtypes. That way we can respect the quickcheck size hinting
/// and shrinking and whatnot.
pub fn backends_are_consistent(re: &str) -> Result<u64, String> {
    let standard_backends = vec![
        (
            "bounded_backtracking_re",
            ExecBuilder::new(re)
                .bounded_backtracking()
                .build()
                .map(|exec| exec.into_regex())
                .map_err(|err| format!("{}", err))?,
        ),
        (
            "pikevm_re",
            ExecBuilder::new(re)
                .nfa()
                .build()
                .map(|exec| exec.into_regex())
                .map_err(|err| format!("{}", err))?,
        ),
        (
            "default_re",
            ExecBuilder::new(re)
                .build()
                .map(|exec| exec.into_regex())
                .map_err(|err| format!("{}", err))?,
        ),
    ];

    let utf8bytes_backends = vec![
        (
            "bounded_backtracking_utf8bytes_re",
            ExecBuilder::new(re)
                .bounded_backtracking()
                .bytes(true)
                .build()
                .map(|exec| exec.into_regex())
                .map_err(|err| format!("{}", err))?,
        ),
        (
            "pikevm_utf8bytes_re",
            ExecBuilder::new(re)
                .nfa()
                .bytes(true)
                .build()
                .map(|exec| exec.into_regex())
                .map_err(|err| format!("{}", err))?,
        ),
        (
            "default_utf8bytes_re",
            ExecBuilder::new(re)
                .bytes(true)
                .build()
                .map(|exec| exec.into_regex())
                .map_err(|err| format!("{}", err))?,
        ),
    ];

    let bytes_backends = vec![
        (
            "bounded_backtracking_bytes_re",
            ExecBuilder::new(re)
                .bounded_backtracking()
                .only_utf8(false)
                .build()
                .map(|exec| exec.into_byte_regex())
                .map_err(|err| format!("{}", err))?,
        ),
        (
            "pikevm_bytes_re",
            ExecBuilder::new(re)
                .nfa()
                .only_utf8(false)
                .build()
                .map(|exec| exec.into_byte_regex())
                .map_err(|err| format!("{}", err))?,
        ),
        (
            "default_bytes_re",
            ExecBuilder::new(re)
                .only_utf8(false)
                .build()
                .map(|exec| exec.into_byte_regex())
                .map_err(|err| format!("{}", err))?,
        ),
    ];

    Ok(string_checker::check_backends(&standard_backends)?
        + string_checker::check_backends(&utf8bytes_backends)?
        + bytes_checker::check_backends(&bytes_backends)?)
}

//
// A consistency checker parameterized by the input type (&str or &[u8]).
//

macro_rules! checker {
    ($module_name:ident, $regex_type:path, $mk_input:expr) => {
        mod $module_name {
            use quickcheck;
            use quickcheck::{Arbitrary, TestResult};

            pub fn check_backends(
                backends: &[(&str, $regex_type)],
            ) -> Result<u64, String> {
                let mut total_passed = 0;
                for regex in backends[1..].iter() {
                    total_passed += quickcheck_regex_eq(&backends[0], regex)?;
                }

                Ok(total_passed)
            }

            fn quickcheck_regex_eq(
                &(name1, ref re1): &(&str, $regex_type),
                &(name2, ref re2): &(&str, $regex_type),
            ) -> Result<u64, String> {
                quickcheck::QuickCheck::new()
                    .quicktest(RegexEqualityTest::new(
                        re1.clone(),
                        re2.clone(),
                    ))
                    .map_err(|err| {
                        format!(
                            "{}(/{}/) and {}(/{}/) are inconsistent.\
                             QuickCheck Err: {:?}",
                            name1, re1, name2, re2, err
                        )
                    })
            }

            struct RegexEqualityTest {
                re1: $regex_type,
                re2: $regex_type,
            }
            impl RegexEqualityTest {
                fn new(re1: $regex_type, re2: $regex_type) -> Self {
                    RegexEqualityTest { re1: re1, re2: re2 }
                }
            }

            impl quickcheck::Testable for RegexEqualityTest {
                fn result<G: quickcheck::Gen>(
                    &self,
                    gen: &mut G,
                ) -> TestResult {
                    let input = $mk_input(gen);
                    let input = &input;

                    if self.re1.find(&input) != self.re2.find(input) {
                        return TestResult::error(format!(
                            "find mismatch input={:?}",
                            input
                        ));
                    }

                    let cap1 = self.re1.captures(input);
                    let cap2 = self.re2.captures(input);
                    match (cap1, cap2) {
                        (None, None) => {}
                        (Some(cap1), Some(cap2)) => {
                            for (c1, c2) in cap1.iter().zip(cap2.iter()) {
                                if c1 != c2 {
                                    return TestResult::error(format!(
                                        "captures mismatch input={:?}",
                                        input
                                    ));
                                }
                            }
                        }
                        _ => {
                            return TestResult::error(format!(
                                "captures mismatch input={:?}",
                                input
                            ))
                        }
                    }

                    let fi1 = self.re1.find_iter(input);
                    let fi2 = self.re2.find_iter(input);
                    for (m1, m2) in fi1.zip(fi2) {
                        if m1 != m2 {
                            return TestResult::error(format!(
                                "find_iter mismatch input={:?}",
                                input
                            ));
                        }
                    }

                    let ci1 = self.re1.captures_iter(input);
                    let ci2 = self.re2.captures_iter(input);
                    for (cap1, cap2) in ci1.zip(ci2) {
                        for (c1, c2) in cap1.iter().zip(cap2.iter()) {
                            if c1 != c2 {
                                return TestResult::error(format!(
                                    "captures_iter mismatch input={:?}",
                                    input
                                ));
                            }
                        }
                    }

                    let s1 = self.re1.split(input);
                    let s2 = self.re2.split(input);
                    for (chunk1, chunk2) in s1.zip(s2) {
                        if chunk1 != chunk2 {
                            return TestResult::error(format!(
                                "split mismatch input={:?}",
                                input
                            ));
                        }
                    }

                    TestResult::from_bool(true)
                }
            }
        } // mod
    }; // rule case
} // macro_rules!

checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen));
checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary(
    gen
));