Skip to main content

rusty_pdfgrep/
engine.rs

1//! Pluggable regex engine for pattern matching (FR-001..FR-005, AD-005).
2//!
3//! Default: `regex` crate (RE2-style, linear-time, no lookaround).
4//! `-F`/`--fixed-strings`: `regex::escape` then compile as a literal.
5//! `-P`/`--perl-regexp`: `fancy-regex` (pure-Rust PCRE-compat).
6
7use crate::error::PdfGrepError;
8
9/// Selected match engine. Constructed once at startup via [`compile`].
10#[derive(Debug)]
11pub enum Engine {
12    /// Default RE2-style engine (`regex` crate).
13    Regex(regex::Regex),
14    /// PCRE-compatible engine (`fancy-regex` crate). Activated by `-P`.
15    Fancy(fancy_regex::Regex),
16}
17
18/// Compile a pattern into an [`Engine`].
19///
20/// # Errors
21///
22/// Returns [`PdfGrepError::RegexCompile`] on a malformed pattern.
23pub fn compile(
24    pattern: &str,
25    fixed_strings: bool,
26    perl_regexp: bool,
27    case_insensitive: bool,
28) -> Result<Engine, PdfGrepError> {
29    let prepared = if fixed_strings {
30        regex::escape(pattern)
31    } else {
32        pattern.to_string()
33    };
34
35    if perl_regexp {
36        let raw = if case_insensitive {
37            format!("(?i){prepared}")
38        } else {
39            prepared
40        };
41        fancy_regex::Regex::new(&raw)
42            .map(Engine::Fancy)
43            .map_err(|e| PdfGrepError::RegexCompile {
44                pattern: pattern.to_string(),
45                message: e.to_string(),
46            })
47    } else {
48        regex::RegexBuilder::new(&prepared)
49            .case_insensitive(case_insensitive)
50            .build()
51            .map(Engine::Regex)
52            .map_err(|e| PdfGrepError::RegexCompile {
53                pattern: pattern.to_string(),
54                message: e.to_string(),
55            })
56    }
57}
58
59impl Engine {
60    /// Find all matches in `text`. Returns `Vec<(start, end)>` byte spans
61    /// (within `text`). Allocations are minimized — caller can iterate the
62    /// returned vector or pass it to the formatter.
63    #[must_use]
64    pub fn find_all(&self, text: &str) -> Vec<(usize, usize)> {
65        match self {
66            Engine::Regex(r) => r.find_iter(text).map(|m| (m.start(), m.end())).collect(),
67            Engine::Fancy(r) => {
68                let mut out = Vec::new();
69                let mut start = 0;
70                while start <= text.len() {
71                    match r.find_from_pos(text, start) {
72                        Ok(Some(m)) => {
73                            let s = m.start();
74                            let e = m.end();
75                            out.push((s, e));
76                            start = if e == s { e + 1 } else { e };
77                        }
78                        Ok(None) => break,
79                        Err(_) => break,
80                    }
81                }
82                out
83            }
84        }
85    }
86
87    /// Quick yes/no check; used by `-q`, `-l`, `-L`.
88    #[must_use]
89    pub fn is_match(&self, text: &str) -> bool {
90        match self {
91            Engine::Regex(r) => r.is_match(text),
92            Engine::Fancy(r) => r.is_match(text).unwrap_or(false),
93        }
94    }
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100
101    #[test]
102    fn regex_default_engine() {
103        let e = compile("foo+", false, false, false).unwrap();
104        assert!(matches!(e, Engine::Regex(_)));
105        assert_eq!(e.find_all("xfoooy"), vec![(1, 5)]);
106    }
107
108    #[test]
109    fn fixed_strings_escapes_metacharacters() {
110        // `(group)` as fixed string should match literally, NOT as regex group.
111        let e = compile("(group)", true, false, false).unwrap();
112        assert!(e.is_match("here is (group) literally"));
113        assert!(!e.is_match("here is group without parens"));
114    }
115
116    #[test]
117    fn perl_regexp_lookahead() {
118        let e = compile("foo(?=bar)", false, true, false).unwrap();
119        assert!(matches!(e, Engine::Fancy(_)));
120        assert!(e.is_match("foobar"));
121        assert!(!e.is_match("foobaz"));
122    }
123
124    #[test]
125    fn case_insensitive() {
126        let e = compile("HELLO", false, false, true).unwrap();
127        assert!(e.is_match("hello world"));
128        let e2 = compile("HELLO", false, true, true).unwrap();
129        assert!(e2.is_match("hello world"));
130    }
131
132    #[test]
133    fn invalid_pattern_errors() {
134        let err = compile("[invalid", false, false, false).unwrap_err();
135        assert!(matches!(err, PdfGrepError::RegexCompile { .. }));
136    }
137}