grep-matcher 0.1.3

A trait for regular expressions, with a focus on line oriented search.
Documentation
use grep_matcher::{Captures, Match, Matcher};
use regex::bytes::Regex;

use util::{RegexMatcher, RegexMatcherNoCaps};

fn matcher(pattern: &str) -> RegexMatcher {
    RegexMatcher::new(Regex::new(pattern).unwrap())
}

fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
    RegexMatcherNoCaps(Regex::new(pattern).unwrap())
}

fn m(start: usize, end: usize) -> Match {
    Match::new(start, end)
}

#[test]
fn find() {
    let matcher = matcher(r"(\w+)\s+(\w+)");
    assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
}

#[test]
fn find_iter() {
    let matcher = matcher(r"(\w+)\s+(\w+)");
    let mut matches = vec![];
    matcher.find_iter(b"aa bb cc dd", |m| {
        matches.push(m);
        true
    }).unwrap();
    assert_eq!(matches, vec![m(0, 5), m(6, 11)]);

    // Test that find_iter respects short circuiting.
    matches.clear();
    matcher.find_iter(b"aa bb cc dd", |m| {
        matches.push(m);
        false
    }).unwrap();
    assert_eq!(matches, vec![m(0, 5)]);
}

#[test]
fn try_find_iter() {
    #[derive(Clone, Debug, Eq, PartialEq)]
    struct MyError;

    let matcher = matcher(r"(\w+)\s+(\w+)");
    let mut matches = vec![];
    let err = matcher.try_find_iter(b"aa bb cc dd", |m| {
        if matches.is_empty() {
            matches.push(m);
            Ok(true)
        } else {
            Err(MyError)
        }
    }).unwrap().unwrap_err();
    assert_eq!(matches, vec![m(0, 5)]);
    assert_eq!(err, MyError);
}

#[test]
fn shortest_match() {
    let matcher = matcher(r"a+");
    // This tests that the default impl isn't doing anything smart, and simply
    // defers to `find`.
    assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
    // The actual underlying regex is smarter.
    assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
}

#[test]
fn captures() {
    let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
    assert_eq!(matcher.capture_count(), 3);
    assert_eq!(matcher.capture_index("a"), Some(1));
    assert_eq!(matcher.capture_index("b"), Some(2));
    assert_eq!(matcher.capture_index("nada"), None);

    let mut caps = matcher.new_captures().unwrap();
    assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
    assert_eq!(caps.get(0), Some(m(1, 14)));
    assert_eq!(caps.get(1), Some(m(1, 6)));
    assert_eq!(caps.get(2), Some(m(7, 14)));
}

#[test]
fn captures_iter() {
    let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
    let mut caps = matcher.new_captures().unwrap();
    let mut matches = vec![];
    matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
        matches.push(caps.get(0).unwrap());
        matches.push(caps.get(1).unwrap());
        matches.push(caps.get(2).unwrap());
        true
    }).unwrap();
    assert_eq!(matches, vec![
        m(0, 5), m(0, 2), m(3, 5),
        m(6, 11), m(6, 8), m(9, 11),
    ]);

    // Test that captures_iter respects short circuiting.
    matches.clear();
    matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
        matches.push(caps.get(0).unwrap());
        matches.push(caps.get(1).unwrap());
        matches.push(caps.get(2).unwrap());
        false
    }).unwrap();
    assert_eq!(matches, vec![
        m(0, 5), m(0, 2), m(3, 5),
    ]);
}

#[test]
fn try_captures_iter() {
    #[derive(Clone, Debug, Eq, PartialEq)]
    struct MyError;

    let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
    let mut caps = matcher.new_captures().unwrap();
    let mut matches = vec![];
    let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
        if matches.is_empty() {
            matches.push(caps.get(0).unwrap());
            matches.push(caps.get(1).unwrap());
            matches.push(caps.get(2).unwrap());
            Ok(true)
        } else {
            Err(MyError)
        }
    }).unwrap().unwrap_err();
    assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
    assert_eq!(err, MyError);
}

// Test that our default impls for capturing are correct. Namely, when
// capturing isn't supported by the underlying matcher, then all of the
// various capturing related APIs fail fast.
#[test]
fn no_captures() {
    let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
    assert_eq!(matcher.capture_count(), 0);
    assert_eq!(matcher.capture_index("a"), None);
    assert_eq!(matcher.capture_index("b"), None);
    assert_eq!(matcher.capture_index("nada"), None);

    let mut caps = matcher.new_captures().unwrap();
    assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());

    let mut called = false;
    matcher.captures_iter(b"homer simpson", &mut caps, |_| {
        called = true;
        true
    }).unwrap();
    assert!(!called);
}

#[test]
fn replace() {
    let matcher = matcher(r"(\w+)\s+(\w+)");
    let mut dst = vec![];
    matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
        dst.push(b'z');
        true
    }).unwrap();
    assert_eq!(dst, b"z z");

    // Test that replacements respect short circuiting.
    dst.clear();
    matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
        dst.push(b'z');
        false
    }).unwrap();
    assert_eq!(dst, b"z cc dd");
}

#[test]
fn replace_with_captures() {
    let matcher = matcher(r"(\w+)\s+(\w+)");
    let haystack = b"aa bb cc dd";
    let mut caps = matcher.new_captures().unwrap();
    let mut dst = vec![];
    matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
        caps.interpolate(
            |name| matcher.capture_index(name),
            haystack,
            b"$2 $1",
            dst,
        );
        true
    }).unwrap();
    assert_eq!(dst, b"bb aa dd cc");

    // Test that replacements respect short circuiting.
    dst.clear();
    matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
        caps.interpolate(
            |name| matcher.capture_index(name),
            haystack,
            b"$2 $1",
            dst,
        );
        false
    }).unwrap();
    assert_eq!(dst, b"bb aa cc dd");
}