regex 1.1.8

An implementation of regular expressions for Rust. This implementation uses finite automata and guarantees linear time matching on all inputs.
Documentation
mat!(ascii_literal, r"a", "a", Some((0, 1)));

// Some crazy expressions from regular-expressions.info.
mat!(match_ranges,
     r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
     "num: 255", Some((5, 8)));
mat!(match_ranges_not,
     r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
     "num: 256", None);
mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
mat!(match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
     "mine is jam.slam@gmail.com ", Some((8, 26)));
mat!(match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
     "mine is jam.slam@gmail ", None);
mat!(match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
     "mine is jam.slam@gmail.com ", Some((8, 26)));
mat!(match_date1,
     r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
     "1900-01-01", Some((0, 10)));
mat!(match_date2,
     r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
     "1900-00-01", None);
mat!(match_date3,
     r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
     "1900-13-01", None);

// Do some crazy dancing with the start/end assertions.
matiter!(match_start_end_empty, r"^$", "", (0, 0));
matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0));
matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0));
matiter!(match_start_end_empty_rev, r"$^", "", (0, 0));
matiter!(match_start_end_empty_rep, r"(?:^$)*", "a\nb\nc",
         (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
matiter!(match_start_end_empty_rep_rev, r"(?:$^)*", "a\nb\nc",
         (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));

// Test negated character classes.
mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3)));
mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3)));
mat!(negclass_letter_space, r"[^a\s]", "a x", Some((2, 3)));
mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3)));
mat!(negclass_space, r"[^\s]", " a", Some((1, 2)));
mat!(negclass_space_comma, r"[^,\s]", ", a", Some((2, 3)));
mat!(negclass_comma_space, r"[^\s,]", " ,a", Some((2, 3)));
mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));

// Test that repeated empty expressions don't loop forever.
mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));

// Test that we handle various flavors of empty expressions.
matiter!(match_empty1, r"", "", (0, 0));
matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));

// Test that the DFA can handle pathological cases.
// (This should result in the DFA's cache being flushed too frequently, which
// should cause it to quit and fall back to the NFA algorithm.)
#[test]
fn dfa_handles_pathological_case() {
    fn ones_and_zeroes(count: usize) -> String {
        use rand::{Rng, thread_rng};

        let mut rng = thread_rng();
        let mut s = String::new();
        for _ in 0..count {
            if rng.gen() {
                s.push('1');
            } else {
                s.push('0');
            }
        }
        s
    }

    let re = regex!(r"[01]*1[01]{20}$");
    let text = {
        let mut pieces = ones_and_zeroes(100_000);
        pieces.push('1');
        pieces.push_str(&ones_and_zeroes(20));
        pieces
    };
    assert!(re.is_match(text!(&*text)));
}

#[test]
fn nest_limit_makes_it_parse() {
    use regex::RegexBuilder;

    RegexBuilder::new(
        r#"
        2(?:
          [45]\d{3}|
          7(?:
            1[0-267]|
            2[0-289]|
            3[0-29]|
            4[01]|
            5[1-3]|
            6[013]|
            7[0178]|
            91
          )|
          8(?:
            0[125]|
            [139][1-6]|
            2[0157-9]|
            41|
            6[1-35]|
            7[1-5]|
            8[1-8]|
            90
          )|
          9(?:
            0[0-2]|
            1[0-4]|
            2[568]|
            3[3-6]|
            5[5-7]|
            6[0167]|
            7[15]|
            8[0146-9]
          )
        )\d{4}|
        3(?:
          12?[5-7]\d{2}|
          0(?:
            2(?:
              [025-79]\d|
              [348]\d{1,2}
            )|
            3(?:
              [2-4]\d|
              [56]\d?
            )
          )|
          2(?:
            1\d{2}|
            2(?:
              [12]\d|
              [35]\d{1,2}|
              4\d?
            )
          )|
          3(?:
            1\d{2}|
            2(?:
              [2356]\d|
              4\d{1,2}
            )
          )|
          4(?:
            1\d{2}|
            2(?:
              2\d{1,2}|
              [47]|
              5\d{2}
            )
          )|
          5(?:
            1\d{2}|
            29
          )|
          [67]1\d{2}|
          8(?:
            1\d{2}|
            2(?:
              2\d{2}|
              3|
              4\d
            )
          )
        )\d{3}|
        4(?:
          0(?:
            2(?:
              [09]\d|
              7
            )|
            33\d{2}
          )|
          1\d{3}|
          2(?:
            1\d{2}|
            2(?:
              [25]\d?|
              [348]\d|
              [67]\d{1,2}
            )
          )|
          3(?:
            1\d{2}(?:
              \d{2}
            )?|
            2(?:
              [045]\d|
              [236-9]\d{1,2}
            )|
            32\d{2}
          )|
          4(?:
            [18]\d{2}|
            2(?:
              [2-46]\d{2}|
              3
            )|
            5[25]\d{2}
          )|
          5(?:
            1\d{2}|
            2(?:
              3\d|
              5
            )
          )|
          6(?:
            [18]\d{2}|
            2(?:
              3(?:
                \d{2}
              )?|
              [46]\d{1,2}|
              5\d{2}|
              7\d
            )|
            5(?:
              3\d?|
              4\d|
              [57]\d{1,2}|
              6\d{2}|
              8
            )
          )|
          71\d{2}|
          8(?:
            [18]\d{2}|
            23\d{2}|
            54\d{2}
          )|
          9(?:
            [18]\d{2}|
            2[2-5]\d{2}|
            53\d{1,2}
          )
        )\d{3}|
        5(?:
          02[03489]\d{2}|
          1\d{2}|
          2(?:
            1\d{2}|
            2(?:
              2(?:
                \d{2}
              )?|
              [457]\d{2}
            )
          )|
          3(?:
            1\d{2}|
            2(?:
              [37](?:
                \d{2}
              )?|
              [569]\d{2}
            )
          )|
          4(?:
            1\d{2}|
            2[46]\d{2}
          )|
          5(?:
            1\d{2}|
            26\d{1,2}
          )|
          6(?:
            [18]\d{2}|
            2|
            53\d{2}
          )|
          7(?:
            1|
            24
          )\d{2}|
          8(?:
            1|
            26
          )\d{2}|
          91\d{2}
        )\d{3}|
        6(?:
          0(?:
            1\d{2}|
            2(?:
              3\d{2}|
              4\d{1,2}
            )
          )|
          2(?:
            2[2-5]\d{2}|
            5(?:
              [3-5]\d{2}|
              7
            )|
            8\d{2}
          )|
          3(?:
            1|
            2[3478]
          )\d{2}|
          4(?:
            1|
            2[34]
          )\d{2}|
          5(?:
            1|
            2[47]
          )\d{2}|
          6(?:
            [18]\d{2}|
            6(?:
              2(?:
                2\d|
                [34]\d{2}
              )|
              5(?:
                [24]\d{2}|
                3\d|
                5\d{1,2}
              )
            )
          )|
          72[2-5]\d{2}|
          8(?:
            1\d{2}|
            2[2-5]\d{2}
          )|
          9(?:
            1\d{2}|
            2[2-6]\d{2}
          )
        )\d{3}|
        7(?:
          (?:
            02|
            [3-589]1|
            6[12]|
            72[24]
          )\d{2}|
          21\d{3}|
          32
        )\d{3}|
        8(?:
          (?:
            4[12]|
            [5-7]2|
            1\d?
          )|
          (?:
            0|
            3[12]|
            [5-7]1|
            217
          )\d
        )\d{4}|
        9(?:
          [35]1|
          (?:
            [024]2|
            81
          )\d|
          (?:
            1|
            [24]1
          )\d{2}
        )\d{3}
        "#
    )
    .build()
    .unwrap();
}