resharp 0.5.2

high-performance regex engine with intersection and complement operations
Documentation
use resharp::Regex;
use std::path::Path;

#[test]
fn stream_toml() {
    let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests").join("stream.toml");
    let content = std::fs::read_to_string(&path).unwrap();
    let table: toml::Value = content.parse().unwrap();
    let tests = table["test"].as_array().unwrap();
    for t in tests {
        let name = t.get("name").and_then(|v| v.as_str()).unwrap_or("");
        let pattern = t["pattern"].as_str().unwrap();
        let input = t.get("input").and_then(|v| v.as_str()).unwrap_or("").as_bytes();
        let want: Vec<(usize, usize)> = t["matches"]
            .as_array()
            .unwrap()
            .iter()
            .map(|m| {
                let a = m.as_array().unwrap();
                (a[0].as_integer().unwrap() as usize, a[1].as_integer().unwrap() as usize)
            })
            .collect();
        let vs_find_all = t.get("vs_find_all").and_then(|v| v.as_bool()).unwrap_or(false);

        let re = Regex::new(pattern).unwrap_or_else(|e| panic!("{name}: compile: {e}"));
        let s = re.stream(input).unwrap();
        let got: Vec<(usize, usize)> = s.iter().map(|m| (m.start, m.end)).collect();
        assert_eq!(got, want, "name={name} pattern={pattern:?} input={input:?}");

        if vs_find_all {
            let f = re.find_all(input).unwrap();
            assert_eq!(s, f, "name={name} stream != find_all");
        }
    }
}

#[test]
fn test_stream_prefix_skip_helps() {
    let mut data = Vec::with_capacity(2_000_000);
    for _ in 0..50_000 {
        data.extend_from_slice(b"............................................");
        data.extend_from_slice(b"Id=\"42\" .");
    }
    let re = Regex::new(r#"Id="\d+""#).unwrap();
    let m = re.stream(&data).unwrap();
    assert_eq!(m.len(), 50_000);
}

#[test]
fn test_stream_with_callback() {
    let r = Regex::new(r"\d+").unwrap();
    let input = b"a12 b34 c5 d6789";
    let want = r.stream(input).unwrap();
    let mut got = Vec::new();
    r.stream_with(input, |m| got.push(m)).unwrap();
    assert_eq!(got, want);

    let mut count = 0usize;
    r.stream_with(input, |_| count += 1).unwrap();
    assert_eq!(count, want.len());

    let mut fired = false;
    r.stream_with(b"", |_| fired = true).unwrap();
    assert!(!fired);
}

#[test]
fn test_cross_chunk_boundary() {
    let r = resharp::Regex::new("abcdef").unwrap();
    let mut got = Vec::new();
    let mut state = resharp::StreamState::new();
    for chunk in [b"abc".as_slice(), b"def"] {
        state = r.stream_chunk(chunk, state, |e| got.push(e)).unwrap();
    }
    let want = r.stream_ends(b"abcdef").unwrap();
    assert_eq!(got, want);
}

#[test]
fn test_stream_chunk() {
    let r = Regex::new(r"\d+").unwrap();
    let input = b"a12 b34 c5 d6789";

    let want = r.stream_ends(input).unwrap();

    for chunk_size in [1, 2, 3, 4, 7, 16, input.len()] {
        let mut got = Vec::new();
        let mut state = resharp::StreamState::new();
        for chunk in input.chunks(chunk_size) {
            state = r.stream_chunk(chunk, state, |e| got.push(e)).unwrap();
        }
        assert_eq!(got, want, "chunk_size={chunk_size}");
    }
}


#[test]
fn seek_fwd_rev_cursor() {
    let re = Regex::new("a[bc]+d").unwrap();
    let input = b"xx abcd yy abbcd zz acd ww abd";
    let stream_matches: Vec<(usize, usize)> = re.stream(input).unwrap().iter().map(|m| (m.start, m.end)).collect();

    let mut fwd: Vec<usize> = Vec::new();
    let (mut s, mut p) = (Regex::SEEK_INITIAL, 0usize);
    while let Some((ns, end)) = re.seek_fwd(input, s, p).unwrap() {
        fwd.push(end);
        s = ns;
        p = end;
    }
    let want_ends: Vec<usize> = stream_matches.iter().map(|m| m.1).collect();
    assert_eq!(fwd, want_ends, "seek_fwd ends");

    let mut rev: Vec<usize> = Vec::new();
    let (mut s, mut p) = (Regex::SEEK_INITIAL, input.len());
    while let Some((ns, start)) = re.seek_rev(input, s, p).unwrap() {
        rev.push(start);
        s = ns;
        p = start;
    }
    let mut want_starts: Vec<usize> = stream_matches.iter().map(|m| m.0).collect();
    want_starts.reverse();
    assert_eq!(rev, want_starts, "seek_rev starts");
}

#[test]
fn seek_fwd_from_middle() {
    let re = Regex::new("lookaround").unwrap();
    let input = b"foo lookaround bar baz lookaround qux end";
    let mid = 20;
    let (_, end) = re.seek_fwd(input, Regex::SEEK_INITIAL, mid).unwrap().unwrap();
    assert_eq!(end, 33);
    assert_eq!(&input[end - 10..end], b"lookaround");
}

#[test]
fn seek_rev_from_middle() {
    let re = Regex::new("lookaround").unwrap();
    let input = b"foo lookaround bar baz lookaround qux end";
    let mid = 20;
    let (_, start) = re.seek_rev(input, Regex::SEEK_INITIAL, mid).unwrap().unwrap();
    assert_eq!(start, 4);
    assert_eq!(&input[start..start + 10], b"lookaround");
}

#[test]
fn seek_no_match() {
    let re = Regex::new("zzz").unwrap();
    let input = b"the quick brown fox jumps over the lazy dog";
    assert!(re.seek_fwd(input, Regex::SEEK_INITIAL, 10).unwrap().is_none());
    assert!(re.seek_rev(input, Regex::SEEK_INITIAL, 30).unwrap().is_none());
}

#[test]
fn seek_fwd_skips_match_before_pos() {
    let re = Regex::new("abcdef").unwrap();
    let input = b"xx abcdef yy abcdef zz";
    let (_, end) = re.seek_fwd(input, Regex::SEEK_INITIAL, 0).unwrap().unwrap();
    assert_eq!(end, 9);
    let (_, end) = re.seek_fwd(input, Regex::SEEK_INITIAL, 5).unwrap().unwrap();
    assert_eq!(end, 19);
    assert!(re.seek_fwd(input, Regex::SEEK_INITIAL, 20).unwrap().is_none());
}

#[test]
fn seek_fwd_with_class_pattern() {
    let re = Regex::new(r"\d+").unwrap();
    let input = b"abc 123 def 4567 ghi 89 jkl";
    let mut ends = Vec::new();
    let (mut s, mut p) = (Regex::SEEK_INITIAL, 8usize);
    while let Some((ns, e)) = re.seek_fwd(input, s, p).unwrap() {
        ends.push(e);
        s = ns;
        p = e;
    }
    assert_eq!(ends, vec![13, 14, 15, 16, 22, 23]);
}