Skip to main content

rgx/engine/
pcre2.rs

1use super::{
2    CaptureGroup, CompiledRegex, EngineError, EngineFlags, EngineKind, EngineResult, Match,
3    RegexEngine,
4};
5
6pub struct Pcre2Engine;
7
8impl RegexEngine for Pcre2Engine {
9    fn kind(&self) -> EngineKind {
10        EngineKind::Pcre2
11    }
12
13    fn compile(&self, pattern: &str, flags: &EngineFlags) -> EngineResult<Box<dyn CompiledRegex>> {
14        let mut builder = pcre2::bytes::RegexBuilder::new();
15        builder.utf(true);
16        builder.ucp(flags.unicode);
17        builder.caseless(flags.case_insensitive);
18        builder.multi_line(flags.multi_line);
19        builder.dotall(flags.dot_matches_newline);
20        builder.extended(flags.extended);
21        builder.jit_if_available(true);
22
23        let re = builder
24            .build(pattern)
25            .map_err(|e| EngineError::CompileError(e.to_string()))?;
26
27        Ok(Box::new(Pcre2CompiledRegex { re }))
28    }
29}
30
31struct Pcre2CompiledRegex {
32    re: pcre2::bytes::Regex,
33}
34
35impl CompiledRegex for Pcre2CompiledRegex {
36    fn find_matches(&self, text: &str) -> EngineResult<Vec<Match>> {
37        let mut matches = Vec::new();
38        let bytes = text.as_bytes();
39
40        let mut offset = 0;
41        while offset <= bytes.len() {
42            let caps = match self.re.captures(&bytes[offset..]) {
43                Ok(Some(caps)) => caps,
44                Ok(None) => break,
45                Err(e) => return Err(EngineError::MatchError(e.to_string())),
46            };
47
48            let overall = caps.get(0).unwrap();
49            if overall.start() == overall.end() && overall.start() == 0 && offset > 0 {
50                offset += 1;
51                continue;
52            }
53
54            let abs_start = offset + overall.start();
55            let abs_end = offset + overall.end();
56
57            let mut captures = Vec::new();
58            for i in 1..caps.len() {
59                if let Some(m) = caps.get(i) {
60                    let cap_start = offset + m.start();
61                    let cap_end = offset + m.end();
62                    captures.push(CaptureGroup {
63                        index: i,
64                        name: None,
65                        start: cap_start,
66                        end: cap_end,
67                        text: text[cap_start..cap_end].to_string(),
68                    });
69                }
70            }
71
72            matches.push(Match {
73                start: abs_start,
74                end: abs_end,
75                text: text[abs_start..abs_end].to_string(),
76                captures,
77            });
78
79            if overall.start() == overall.end() {
80                offset += abs_end + 1;
81            } else {
82                offset = abs_end;
83            }
84        }
85
86        Ok(matches)
87    }
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn test_simple_match() {
96        let engine = Pcre2Engine;
97        let flags = EngineFlags::default();
98        let compiled = engine.compile(r"\d+", &flags).unwrap();
99        let matches = compiled.find_matches("abc 123 def 456").unwrap();
100        assert_eq!(matches.len(), 2);
101        assert_eq!(matches[0].text, "123");
102    }
103
104    #[test]
105    fn test_backreference() {
106        let engine = Pcre2Engine;
107        let flags = EngineFlags::default();
108        let compiled = engine.compile(r"(\w+) \1", &flags).unwrap();
109        let matches = compiled.find_matches("hello hello world").unwrap();
110        assert_eq!(matches.len(), 1);
111        assert_eq!(matches[0].text, "hello hello");
112    }
113
114    #[test]
115    fn test_lookahead() {
116        let engine = Pcre2Engine;
117        let flags = EngineFlags::default();
118        let compiled = engine.compile(r"\w+(?=@)", &flags).unwrap();
119        let matches = compiled.find_matches("user@example.com").unwrap();
120        assert_eq!(matches.len(), 1);
121        assert_eq!(matches[0].text, "user");
122    }
123}