Skip to main content

rgx/engine/
pcre2.rs

1use super::{
2    CaptureGroup, CompiledRegex, EngineError, EngineFlags, EngineKind, EngineResult, Match,
3    RegexEngine,
4};
5
6pub struct Pcre2Engine;
7
8impl RegexEngine for Pcre2Engine {
9    fn kind(&self) -> EngineKind {
10        EngineKind::Pcre2
11    }
12
13    fn compile(&self, pattern: &str, flags: &EngineFlags) -> EngineResult<Box<dyn CompiledRegex>> {
14        let mut builder = pcre2::bytes::RegexBuilder::new();
15        builder.utf(true);
16        builder.ucp(flags.unicode);
17        builder.caseless(flags.case_insensitive);
18        builder.multi_line(flags.multi_line);
19        builder.dotall(flags.dot_matches_newline);
20        builder.extended(flags.extended);
21        builder.jit_if_available(true);
22
23        let re = builder
24            .build(pattern)
25            .map_err(|e| EngineError::CompileError(e.to_string()))?;
26
27        Ok(Box::new(Pcre2CompiledRegex { re }))
28    }
29}
30
31struct Pcre2CompiledRegex {
32    re: pcre2::bytes::Regex,
33}
34
35impl CompiledRegex for Pcre2CompiledRegex {
36    fn find_matches(&self, text: &str) -> EngineResult<Vec<Match>> {
37        let mut matches = Vec::new();
38        let bytes = text.as_bytes();
39
40        let mut offset = 0;
41        while offset <= bytes.len() {
42            let caps = match self.re.captures(&bytes[offset..]) {
43                Ok(Some(caps)) => caps,
44                Ok(None) => break,
45                Err(e) => return Err(EngineError::MatchError(e.to_string())),
46            };
47
48            let overall = caps.get(0).unwrap();
49            if overall.start() == overall.end() && overall.start() == 0 && offset > 0 {
50                offset += 1;
51                continue;
52            }
53
54            let abs_start = offset + overall.start();
55            let abs_end = offset + overall.end();
56
57            let mut captures = Vec::new();
58            let names = self.re.capture_names();
59            for i in 1..caps.len() {
60                if let Some(m) = caps.get(i) {
61                    let cap_start = offset + m.start();
62                    let cap_end = offset + m.end();
63                    let name = names.get(i).and_then(|n| n.clone());
64                    captures.push(CaptureGroup {
65                        index: i,
66                        name,
67                        start: cap_start,
68                        end: cap_end,
69                        text: text[cap_start..cap_end].to_string(),
70                    });
71                }
72            }
73
74            matches.push(Match {
75                start: abs_start,
76                end: abs_end,
77                text: text[abs_start..abs_end].to_string(),
78                captures,
79            });
80
81            if overall.start() == overall.end() {
82                offset += abs_end + 1;
83            } else {
84                offset = abs_end;
85            }
86        }
87
88        Ok(matches)
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95
96    #[test]
97    fn test_simple_match() {
98        let engine = Pcre2Engine;
99        let flags = EngineFlags::default();
100        let compiled = engine.compile(r"\d+", &flags).unwrap();
101        let matches = compiled.find_matches("abc 123 def 456").unwrap();
102        assert_eq!(matches.len(), 2);
103        assert_eq!(matches[0].text, "123");
104    }
105
106    #[test]
107    fn test_named_captures() {
108        let engine = Pcre2Engine;
109        let flags = EngineFlags::default();
110        let compiled = engine
111            .compile(r"(?P<user>\w+)@(?P<domain>\w+)", &flags)
112            .unwrap();
113        let matches = compiled.find_matches("user@example").unwrap();
114        assert_eq!(matches.len(), 1);
115        assert_eq!(matches[0].captures.len(), 2);
116        assert_eq!(matches[0].captures[0].name, Some("user".to_string()));
117        assert_eq!(matches[0].captures[0].text, "user");
118        assert_eq!(matches[0].captures[1].name, Some("domain".to_string()));
119        assert_eq!(matches[0].captures[1].text, "example");
120    }
121
122    #[test]
123    fn test_backreference() {
124        let engine = Pcre2Engine;
125        let flags = EngineFlags::default();
126        let compiled = engine.compile(r"(\w+) \1", &flags).unwrap();
127        let matches = compiled.find_matches("hello hello world").unwrap();
128        assert_eq!(matches.len(), 1);
129        assert_eq!(matches[0].text, "hello hello");
130    }
131
132    #[test]
133    fn test_lookahead() {
134        let engine = Pcre2Engine;
135        let flags = EngineFlags::default();
136        let compiled = engine.compile(r"\w+(?=@)", &flags).unwrap();
137        let matches = compiled.find_matches("user@example.com").unwrap();
138        assert_eq!(matches.len(), 1);
139        assert_eq!(matches[0].text, "user");
140    }
141}