resharp 0.4.1

high-performance regex engine with intersection and complement operations
Documentation
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};

fn data_dir() -> String {
    format!("{}/../data", env!("CARGO_MANIFEST_DIR"))
}

fn load_haystack(name: &str) -> String {
    let path = format!("{}/haystacks/{}", data_dir(), name);
    std::fs::read_to_string(&path).unwrap_or_else(|e| panic!("failed to load {}: {}", path, e))
}

fn load_dictionary_words(n: usize) -> Vec<String> {
    let path = format!("{}/regexes/length-15.txt", data_dir());
    let contents = std::fs::read_to_string(&path).unwrap();
    contents.lines().take(n).map(|s| s.to_string()).collect()
}

fn bench_dictionary_vs_aho(c: &mut Criterion) {
    let haystack = load_haystack("en-sampled.txt");
    let words = load_dictionary_words(2663);
    let pattern = words.join("|");
    let input = haystack.as_bytes();

    let mut group = c.benchmark_group("dictionary-vs-aho");
    group.throughput(Throughput::Bytes(input.len() as u64));

    let re_resharp = resharp::Regex::new(&pattern).unwrap();
    re_resharp.find_all(input).ok();
    group.bench_function("resharp", |b| {
        b.iter(|| black_box(re_resharp.find_all(black_box(input)).unwrap().len()));
    });

    let re_hardened =
        resharp::Regex::with_options(&pattern, resharp::EngineOptions::default().hardened(true))
            .unwrap();
    re_hardened.find_all(input).ok();
    group.bench_function("resharp-hardened", |b| {
        b.iter(|| black_box(re_hardened.find_all(black_box(input)).unwrap().len()));
    });

    group.finish();
}

fn bench_hardened_pathological(c: &mut Criterion) {
    let pattern = r".*[^A-Z]|[A-Z]";
    let re_default = resharp::Regex::new(pattern).unwrap();
    let re_hardened =
        resharp::Regex::with_options(pattern, resharp::EngineOptions::default().hardened(true))
            .unwrap();

    for &size in &[1_000, 10_000] {
        let input = "A".repeat(size);
        let input = input.as_bytes();
        let label = format!("hardened-pathological/{}K", size / 1000);
        let mut group = c.benchmark_group(&label);
        group.throughput(Throughput::Bytes(input.len() as u64));

        let re_d = &re_default;
        re_d.find_all(input).ok();
        group.bench_function("default", |b| {
            b.iter(|| black_box(re_d.find_all(black_box(input)).unwrap().len()));
        });

        let re_h = &re_hardened;
        re_h.find_all(input).ok();
        group.bench_function("hardened", |b| {
            b.iter(|| black_box(re_h.find_all(black_box(input)).unwrap().len()));
        });

        group.finish();
    }
}

criterion_group! {
    name = benches;
    config = Criterion::default().without_plots()
        .warm_up_time(std::time::Duration::from_secs(1))
        .measurement_time(std::time::Duration::from_secs(3))
        .sample_size(20);
    targets = bench_dictionary_vs_aho, bench_hardened_pathological
}
criterion_main!(benches);