resharp 0.6.13

high-performance regex engine with intersection and complement operations
Documentation
use resharp::{Regex, RegexOptions, UnicodeMode};

const PATTERNS: &[&str] = &[
    r"^$",
    r"^",
    r"$",
    r"^a$",
    r"^a*$",
    r"\b",
    r"\B",
    r"\ba\b",
    r"a\b",
    r"\bא",
    r"(?<=a)b",
    r"(?<=a)",
    r"a(?=b)",
    r"(?=a)",
    r"(?!a)",
    r"(?<!a)b",
    r"\n",
    r"$\n",
    r"^\n",
    r".",
    r".*",
    r"a*",
    r"(a|b)*",
    r"a+",
    r"\d+",
    r"\w+",
    r"[^\n]",
    r"^.*$",
    r"(?m)^",
    r"(?m)$",
    r"(?m)^a$",
    r"\bx",
    r"x\b",
    r"\Bx\B",
    r"a{0}",
    r"\ba{0}\b",
    r"(?<=\.)y",
    r"x|(?<=\.)y",
    r".|(?<=ab)y",
];

const INPUTS: &[&[u8]] = &[
    b"",
    b"\n",
    b"\n\n",
    b"\n\n\n",
    b"a",
    b"ab",
    b"abc",
    b"a\nb",
    b"a\n\nb",
    b".y",
    b".axy",
    b"  a  ",
    b"aaa",
    b"a.b.c",
    b"\xd7\x90",
    b"x.y",
    b"abxaby",
    b"\na\n",
];

fn mk(p: &str, mode: UnicodeMode, hardened: bool) -> Option<Regex> {
    let opts = RegexOptions::default().unicode(mode).hardened(hardened);
    Regex::with_options(p, opts).ok()
}

#[test]
fn cross_api_consistency() {
    let modes = [
        UnicodeMode::Ascii,
        UnicodeMode::Default,
    ];
    let mut fails: Vec<String> = Vec::new();
    for &p in PATTERNS {
        for &mode in &modes {
            let Some(re) = mk(p, mode, false) else { continue };
            for &inp in INPUTS {
                let find_all = re.find_all(inp).unwrap();
                let is_match = re.is_match(inp).unwrap();
                let anchored = re.find_anchored(inp).ok().flatten();

                if is_match != !find_all.is_empty() {
                    fails.push(format!(
                        "is_match vs find_all: {p:?} {mode:?} {inp:?} is_match={is_match} find_all={find_all:?}"
                    ));
                }
                if let Some(am) = anchored {
                    if !is_match {
                        fails.push(format!(
                            "find_anchored=Some but !is_match: {p:?} {mode:?} {inp:?}"
                        ));
                    }
                    if find_all.first().copied() != Some(am) {
                        fails.push(format!(
                            "find_anchored vs find_all leftmost: {p:?} {mode:?} {inp:?} anchored={am:?} find_all={find_all:?}"
                        ));
                    }
                }
            }
        }
    }
    assert!(
        fails.is_empty(),
        "{} divergences (showing first 10):\n{}",
        fails.len(),
        fails.iter().take(10).cloned().collect::<Vec<_>>().join("\n")
    );
}

#[test]
fn simd_prefilter_differential() {
    let modes = [
        UnicodeMode::Ascii,
        UnicodeMode::Default,
    ];
    for &p in PATTERNS {
        for &mode in &modes {
            let (Some(fast), Some(slow)) = (mk(p, mode, false), mk(p, mode, true)) else {
                continue;
            };
            for &inp in INPUTS {
                let f = fast.find_all(inp).unwrap();
                let s = slow.find_all(inp).unwrap();
                assert_eq!(
                    f, s,
                    "prefilter on/off diverge: {p:?} {mode:?} {inp:?}"
                );
            }
        }
    }
}

#[test]
fn empty_anchor_newline_seed() {
    let re = Regex::new(r"^$").unwrap();
    assert_eq!(
        re.find_all(b"\n\n").unwrap(),
        vec![
            resharp::Match { start: 0, end: 0 },
            resharp::Match { start: 1, end: 1 },
            resharp::Match { start: 2, end: 2 },
        ]
    );
}

#[test]
fn bounded_repeat_big_class_compile_budget() {
    use std::time::{Duration, Instant};
    let classes = [r"\w", r"\W", r"\D", r"\S"];
    let modes = [
        UnicodeMode::Ascii,
        UnicodeMode::Default,
    ];
    for &c in &classes {
        for &mode in &modes {
            let pat = format!("{c}{{0,64}}");
            let t = Instant::now();
            let opts = RegexOptions::default().unicode(mode);
            let re = Regex::with_options(&pat, opts);
            let elapsed = t.elapsed();
            assert!(re.is_ok(), "compile failed: {pat:?} {mode:?}");
            assert!(
                elapsed < Duration::from_secs(1),
                "bounded repeat compile budget exceeded: {pat:?} {mode:?} took {elapsed:?}"
            );
        }
    }
}