1use super::{
2 CaptureGroup, CompiledRegex, EngineError, EngineFlags, EngineKind, EngineResult, Match,
3 RegexEngine,
4};
5
6pub struct Pcre2Engine;
7
8impl RegexEngine for Pcre2Engine {
9 fn kind(&self) -> EngineKind {
10 EngineKind::Pcre2
11 }
12
13 fn compile(&self, pattern: &str, flags: &EngineFlags) -> EngineResult<Box<dyn CompiledRegex>> {
14 let mut builder = pcre2::bytes::RegexBuilder::new();
15 builder.utf(true);
16 builder.ucp(flags.unicode);
17 builder.caseless(flags.case_insensitive);
18 builder.multi_line(flags.multi_line);
19 builder.dotall(flags.dot_matches_newline);
20 builder.extended(flags.extended);
21 builder.jit_if_available(true);
22
23 let re = builder
24 .build(pattern)
25 .map_err(|e| EngineError::CompileError(e.to_string()))?;
26
27 Ok(Box::new(Pcre2CompiledRegex { re }))
28 }
29}
30
31struct Pcre2CompiledRegex {
32 re: pcre2::bytes::Regex,
33}
34
35impl CompiledRegex for Pcre2CompiledRegex {
36 fn find_matches(&self, text: &str) -> EngineResult<Vec<Match>> {
37 let mut matches = Vec::new();
38 let bytes = text.as_bytes();
39
40 let mut offset = 0;
41 while offset <= bytes.len() {
42 let caps = match self.re.captures(&bytes[offset..]) {
43 Ok(Some(caps)) => caps,
44 Ok(None) => break,
45 Err(e) => return Err(EngineError::MatchError(e.to_string())),
46 };
47
48 let overall = caps.get(0).unwrap();
49 if overall.start() == overall.end() && overall.start() == 0 && offset > 0 {
50 offset += 1;
51 continue;
52 }
53
54 let abs_start = offset + overall.start();
55 let abs_end = offset + overall.end();
56
57 let mut captures = Vec::new();
58 for i in 1..caps.len() {
59 if let Some(m) = caps.get(i) {
60 let cap_start = offset + m.start();
61 let cap_end = offset + m.end();
62 captures.push(CaptureGroup {
63 index: i,
64 name: None,
65 start: cap_start,
66 end: cap_end,
67 text: text[cap_start..cap_end].to_string(),
68 });
69 }
70 }
71
72 matches.push(Match {
73 start: abs_start,
74 end: abs_end,
75 text: text[abs_start..abs_end].to_string(),
76 captures,
77 });
78
79 if overall.start() == overall.end() {
80 offset += abs_end + 1;
81 } else {
82 offset = abs_end;
83 }
84 }
85
86 Ok(matches)
87 }
88}
89
90#[cfg(test)]
91mod tests {
92 use super::*;
93
94 #[test]
95 fn test_simple_match() {
96 let engine = Pcre2Engine;
97 let flags = EngineFlags::default();
98 let compiled = engine.compile(r"\d+", &flags).unwrap();
99 let matches = compiled.find_matches("abc 123 def 456").unwrap();
100 assert_eq!(matches.len(), 2);
101 assert_eq!(matches[0].text, "123");
102 }
103
104 #[test]
105 fn test_backreference() {
106 let engine = Pcre2Engine;
107 let flags = EngineFlags::default();
108 let compiled = engine.compile(r"(\w+) \1", &flags).unwrap();
109 let matches = compiled.find_matches("hello hello world").unwrap();
110 assert_eq!(matches.len(), 1);
111 assert_eq!(matches[0].text, "hello hello");
112 }
113
114 #[test]
115 fn test_lookahead() {
116 let engine = Pcre2Engine;
117 let flags = EngineFlags::default();
118 let compiled = engine.compile(r"\w+(?=@)", &flags).unwrap();
119 let matches = compiled.find_matches("user@example.com").unwrap();
120 assert_eq!(matches.len(), 1);
121 assert_eq!(matches[0].text, "user");
122 }
123}