use crate::error::PdfGrepError;
#[derive(Debug)]
pub enum Engine {
Regex(regex::Regex),
Fancy(fancy_regex::Regex),
}
pub fn compile(
pattern: &str,
fixed_strings: bool,
perl_regexp: bool,
case_insensitive: bool,
) -> Result<Engine, PdfGrepError> {
let prepared = if fixed_strings {
regex::escape(pattern)
} else {
pattern.to_string()
};
if perl_regexp {
let raw = if case_insensitive {
format!("(?i){prepared}")
} else {
prepared
};
fancy_regex::Regex::new(&raw)
.map(Engine::Fancy)
.map_err(|e| PdfGrepError::RegexCompile {
pattern: pattern.to_string(),
message: e.to_string(),
})
} else {
regex::RegexBuilder::new(&prepared)
.case_insensitive(case_insensitive)
.build()
.map(Engine::Regex)
.map_err(|e| PdfGrepError::RegexCompile {
pattern: pattern.to_string(),
message: e.to_string(),
})
}
}
impl Engine {
#[must_use]
pub fn find_all(&self, text: &str) -> Vec<(usize, usize)> {
match self {
Engine::Regex(r) => r.find_iter(text).map(|m| (m.start(), m.end())).collect(),
Engine::Fancy(r) => {
let mut out = Vec::new();
let mut start = 0;
while start <= text.len() {
match r.find_from_pos(text, start) {
Ok(Some(m)) => {
let s = m.start();
let e = m.end();
out.push((s, e));
start = if e == s { e + 1 } else { e };
}
Ok(None) => break,
Err(_) => break,
}
}
out
}
}
}
#[must_use]
pub fn is_match(&self, text: &str) -> bool {
match self {
Engine::Regex(r) => r.is_match(text),
Engine::Fancy(r) => r.is_match(text).unwrap_or(false),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn regex_default_engine() {
let e = compile("foo+", false, false, false).unwrap();
assert!(matches!(e, Engine::Regex(_)));
assert_eq!(e.find_all("xfoooy"), vec![(1, 5)]);
}
#[test]
fn fixed_strings_escapes_metacharacters() {
let e = compile("(group)", true, false, false).unwrap();
assert!(e.is_match("here is (group) literally"));
assert!(!e.is_match("here is group without parens"));
}
#[test]
fn perl_regexp_lookahead() {
let e = compile("foo(?=bar)", false, true, false).unwrap();
assert!(matches!(e, Engine::Fancy(_)));
assert!(e.is_match("foobar"));
assert!(!e.is_match("foobaz"));
}
#[test]
fn case_insensitive() {
let e = compile("HELLO", false, false, true).unwrap();
assert!(e.is_match("hello world"));
let e2 = compile("HELLO", false, true, true).unwrap();
assert!(e2.is_match("hello world"));
}
#[test]
fn invalid_pattern_errors() {
let err = compile("[invalid", false, false, false).unwrap_err();
assert!(matches!(err, PdfGrepError::RegexCompile { .. }));
}
}