1use super::{
2 CaptureGroup, CompiledRegex, EngineError, EngineFlags, EngineKind, EngineResult, Match,
3 RegexEngine,
4};
5
6pub struct Pcre2Engine;
7
8impl RegexEngine for Pcre2Engine {
9 fn kind(&self) -> EngineKind {
10 EngineKind::Pcre2
11 }
12
13 fn compile(&self, pattern: &str, flags: &EngineFlags) -> EngineResult<Box<dyn CompiledRegex>> {
14 let mut builder = pcre2::bytes::RegexBuilder::new();
15 builder.utf(true);
16 builder.ucp(flags.unicode);
17 builder.caseless(flags.case_insensitive);
18 builder.multi_line(flags.multi_line);
19 builder.dotall(flags.dot_matches_newline);
20 builder.extended(flags.extended);
21 builder.jit_if_available(true);
22
23 let re = builder
24 .build(pattern)
25 .map_err(|e| EngineError::CompileError(e.to_string()))?;
26
27 Ok(Box::new(Pcre2CompiledRegex { re }))
28 }
29}
30
31struct Pcre2CompiledRegex {
32 re: pcre2::bytes::Regex,
33}
34
35impl CompiledRegex for Pcre2CompiledRegex {
36 fn find_matches(&self, text: &str) -> EngineResult<Vec<Match>> {
37 let mut matches = Vec::new();
38 let bytes = text.as_bytes();
39
40 let mut offset = 0;
41 while offset <= bytes.len() {
42 let caps = match self.re.captures(&bytes[offset..]) {
43 Ok(Some(caps)) => caps,
44 Ok(None) => break,
45 Err(e) => return Err(EngineError::MatchError(e.to_string())),
46 };
47
48 let overall = caps.get(0).unwrap();
49 if overall.start() == overall.end() && overall.start() == 0 && offset > 0 {
50 offset += 1;
51 continue;
52 }
53
54 let abs_start = offset + overall.start();
55 let abs_end = offset + overall.end();
56
57 let mut captures = Vec::new();
58 let names = self.re.capture_names();
59 for i in 1..caps.len() {
60 if let Some(m) = caps.get(i) {
61 let cap_start = offset + m.start();
62 let cap_end = offset + m.end();
63 let name = names.get(i).and_then(|n| n.clone());
64 captures.push(CaptureGroup {
65 index: i,
66 name,
67 start: cap_start,
68 end: cap_end,
69 text: text[cap_start..cap_end].to_string(),
70 });
71 }
72 }
73
74 matches.push(Match {
75 start: abs_start,
76 end: abs_end,
77 text: text[abs_start..abs_end].to_string(),
78 captures,
79 });
80
81 if overall.start() == overall.end() {
82 offset += abs_end + 1;
83 } else {
84 offset = abs_end;
85 }
86 }
87
88 Ok(matches)
89 }
90}
91
92#[cfg(test)]
93mod tests {
94 use super::*;
95
96 #[test]
97 fn test_simple_match() {
98 let engine = Pcre2Engine;
99 let flags = EngineFlags::default();
100 let compiled = engine.compile(r"\d+", &flags).unwrap();
101 let matches = compiled.find_matches("abc 123 def 456").unwrap();
102 assert_eq!(matches.len(), 2);
103 assert_eq!(matches[0].text, "123");
104 }
105
106 #[test]
107 fn test_named_captures() {
108 let engine = Pcre2Engine;
109 let flags = EngineFlags::default();
110 let compiled = engine
111 .compile(r"(?P<user>\w+)@(?P<domain>\w+)", &flags)
112 .unwrap();
113 let matches = compiled.find_matches("user@example").unwrap();
114 assert_eq!(matches.len(), 1);
115 assert_eq!(matches[0].captures.len(), 2);
116 assert_eq!(matches[0].captures[0].name, Some("user".to_string()));
117 assert_eq!(matches[0].captures[0].text, "user");
118 assert_eq!(matches[0].captures[1].name, Some("domain".to_string()));
119 assert_eq!(matches[0].captures[1].text, "example");
120 }
121
122 #[test]
123 fn test_backreference() {
124 let engine = Pcre2Engine;
125 let flags = EngineFlags::default();
126 let compiled = engine.compile(r"(\w+) \1", &flags).unwrap();
127 let matches = compiled.find_matches("hello hello world").unwrap();
128 assert_eq!(matches.len(), 1);
129 assert_eq!(matches[0].text, "hello hello");
130 }
131
132 #[test]
133 fn test_lookahead() {
134 let engine = Pcre2Engine;
135 let flags = EngineFlags::default();
136 let compiled = engine.compile(r"\w+(?=@)", &flags).unwrap();
137 let matches = compiled.find_matches("user@example.com").unwrap();
138 assert_eq!(matches.len(), 1);
139 assert_eq!(matches[0].text, "user");
140 }
141}