1use regex::Regex;
7use std::collections::HashMap;
8
9#[derive(Debug)]
11pub struct PcreState {
12 pattern: Option<Regex>,
13 pattern_str: Option<String>,
14}
15
16impl Default for PcreState {
17 fn default() -> Self {
18 Self::new()
19 }
20}
21
22impl PcreState {
23 pub fn new() -> Self {
24 Self {
25 pattern: None,
26 pattern_str: None,
27 }
28 }
29
30 pub fn has_pattern(&self) -> bool {
31 self.pattern.is_some()
32 }
33
34 pub fn clear(&mut self) {
35 self.pattern = None;
36 self.pattern_str = None;
37 }
38}
39
40#[derive(Debug, Default, Clone)]
42pub struct PcreCompileOptions {
43 pub anchored: bool,
44 pub caseless: bool,
45 pub multiline: bool,
46 pub extended: bool,
47 pub dotall: bool,
48}
49
50#[derive(Debug, Default, Clone)]
52pub struct PcreMatchOptions {
53 pub match_var: Option<String>,
54 pub array_var: Option<String>,
55 pub assoc_var: Option<String>,
56 pub offset: usize,
57 pub return_offsets: bool,
58 pub use_dfa: bool,
59}
60
61#[derive(Debug, Clone)]
63pub struct PcreMatchResult {
64 pub matched: bool,
65 pub full_match: Option<String>,
66 pub captures: Vec<Option<String>>,
67 pub named_captures: HashMap<String, String>,
68 pub match_start: Option<usize>,
69 pub match_end: Option<usize>,
70}
71
72impl PcreMatchResult {
73 pub fn no_match() -> Self {
74 Self {
75 matched: false,
76 full_match: None,
77 captures: Vec::new(),
78 named_captures: HashMap::new(),
79 match_start: None,
80 match_end: None,
81 }
82 }
83}
84
85pub fn pcre_compile(
87 pattern: &str,
88 options: &PcreCompileOptions,
89 state: &mut PcreState,
90) -> Result<(), String> {
91 state.clear();
92
93 let mut pattern_str = String::new();
94
95 if options.caseless {
96 pattern_str.push_str("(?i)");
97 }
98 if options.multiline {
99 pattern_str.push_str("(?m)");
100 }
101 if options.dotall {
102 pattern_str.push_str("(?s)");
103 }
104 if options.extended {
105 pattern_str.push_str("(?x)");
106 }
107 if options.anchored {
108 pattern_str.push('^');
109 }
110
111 pattern_str.push_str(pattern);
112
113 match Regex::new(&pattern_str) {
114 Ok(re) => {
115 state.pattern = Some(re);
116 state.pattern_str = Some(pattern_str);
117 Ok(())
118 }
119 Err(e) => Err(format!("error in regex: {}", e)),
120 }
121}
122
123pub fn pcre_study(state: &PcreState) -> Result<(), String> {
125 if state.pattern.is_none() {
126 return Err("no pattern has been compiled for study".to_string());
127 }
128 Ok(())
129}
130
131pub fn pcre_match(
133 text: &str,
134 options: &PcreMatchOptions,
135 state: &PcreState,
136) -> Result<PcreMatchResult, String> {
137 let re = state
138 .pattern
139 .as_ref()
140 .ok_or_else(|| "no pattern has been compiled".to_string())?;
141
142 let search_text = if options.offset > 0 && options.offset < text.len() {
143 &text[options.offset..]
144 } else if options.offset >= text.len() {
145 return Ok(PcreMatchResult::no_match());
146 } else {
147 text
148 };
149
150 let caps = match re.captures(search_text) {
151 Some(c) => c,
152 None => return Ok(PcreMatchResult::no_match()),
153 };
154
155 let full_match = caps.get(0).map(|m| m.as_str().to_string());
156 let match_start = caps.get(0).map(|m| m.start() + options.offset);
157 let match_end = caps.get(0).map(|m| m.end() + options.offset);
158
159 let mut captures = Vec::new();
160 for i in 1..caps.len() {
161 captures.push(caps.get(i).map(|m| m.as_str().to_string()));
162 }
163
164 let mut named_captures = HashMap::new();
165 for name in re.capture_names().flatten() {
166 if let Some(m) = caps.name(name) {
167 named_captures.insert(name.to_string(), m.as_str().to_string());
168 }
169 }
170
171 Ok(PcreMatchResult {
172 matched: true,
173 full_match,
174 captures,
175 named_captures,
176 match_start,
177 match_end,
178 })
179}
180
181pub fn cond_pcre_match(lhs: &str, rhs: &str, caseless: bool) -> (bool, PcreMatchResult) {
183 let options = PcreCompileOptions {
184 caseless,
185 ..Default::default()
186 };
187
188 let mut state = PcreState::new();
189
190 if pcre_compile(rhs, &options, &mut state).is_err() {
191 return (false, PcreMatchResult::no_match());
192 }
193
194 let match_options = PcreMatchOptions::default();
195
196 match pcre_match(lhs, &match_options, &state) {
197 Ok(result) => (result.matched, result),
198 Err(_) => (false, PcreMatchResult::no_match()),
199 }
200}
201
202pub fn builtin_pcre_compile(
204 args: &[&str],
205 options: &PcreCompileOptions,
206 state: &mut PcreState,
207) -> (i32, String) {
208 if args.is_empty() {
209 return (1, "pcre_compile: pattern required\n".to_string());
210 }
211
212 match pcre_compile(args[0], options, state) {
213 Ok(()) => (0, String::new()),
214 Err(e) => (1, format!("pcre_compile: {}\n", e)),
215 }
216}
217
218pub fn builtin_pcre_study(state: &PcreState) -> (i32, String) {
220 match pcre_study(state) {
221 Ok(()) => (0, String::new()),
222 Err(e) => (1, format!("pcre_study: {}\n", e)),
223 }
224}
225
226pub fn builtin_pcre_match(
228 args: &[&str],
229 options: &PcreMatchOptions,
230 state: &PcreState,
231) -> (i32, PcreMatchResult) {
232 if args.is_empty() {
233 return (1, PcreMatchResult::no_match());
234 }
235
236 match pcre_match(args[0], options, state) {
237 Ok(result) => {
238 if result.matched {
239 (0, result)
240 } else {
241 (1, result)
242 }
243 }
244 Err(_) => (1, PcreMatchResult::no_match()),
245 }
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 #[test]
253 fn test_pcre_state_new() {
254 let state = PcreState::new();
255 assert!(!state.has_pattern());
256 }
257
258 #[test]
259 fn test_pcre_compile_simple() {
260 let mut state = PcreState::new();
261 let options = PcreCompileOptions::default();
262
263 let result = pcre_compile("hello", &options, &mut state);
264 assert!(result.is_ok());
265 assert!(state.has_pattern());
266 }
267
268 #[test]
269 fn test_pcre_compile_invalid() {
270 let mut state = PcreState::new();
271 let options = PcreCompileOptions::default();
272
273 let result = pcre_compile("[invalid", &options, &mut state);
274 assert!(result.is_err());
275 }
276
277 #[test]
278 fn test_pcre_compile_caseless() {
279 let mut state = PcreState::new();
280 let options = PcreCompileOptions {
281 caseless: true,
282 ..Default::default()
283 };
284
285 let result = pcre_compile("hello", &options, &mut state);
286 assert!(result.is_ok());
287
288 let match_opts = PcreMatchOptions::default();
289 let result = pcre_match("HELLO WORLD", &match_opts, &state).unwrap();
290 assert!(result.matched);
291 }
292
293 #[test]
294 fn test_pcre_study_no_pattern() {
295 let state = PcreState::new();
296 let result = pcre_study(&state);
297 assert!(result.is_err());
298 }
299
300 #[test]
301 fn test_pcre_study_with_pattern() {
302 let mut state = PcreState::new();
303 let options = PcreCompileOptions::default();
304 pcre_compile("hello", &options, &mut state).unwrap();
305
306 let result = pcre_study(&state);
307 assert!(result.is_ok());
308 }
309
310 #[test]
311 fn test_pcre_match_simple() {
312 let mut state = PcreState::new();
313 let options = PcreCompileOptions::default();
314 pcre_compile("hello", &options, &mut state).unwrap();
315
316 let match_opts = PcreMatchOptions::default();
317 let result = pcre_match("hello world", &match_opts, &state).unwrap();
318 assert!(result.matched);
319 assert_eq!(result.full_match, Some("hello".to_string()));
320 }
321
322 #[test]
323 fn test_pcre_match_no_match() {
324 let mut state = PcreState::new();
325 let options = PcreCompileOptions::default();
326 pcre_compile("hello", &options, &mut state).unwrap();
327
328 let match_opts = PcreMatchOptions::default();
329 let result = pcre_match("goodbye world", &match_opts, &state).unwrap();
330 assert!(!result.matched);
331 }
332
333 #[test]
334 fn test_pcre_match_captures() {
335 let mut state = PcreState::new();
336 let options = PcreCompileOptions::default();
337 pcre_compile(r"(\w+) (\w+)", &options, &mut state).unwrap();
338
339 let match_opts = PcreMatchOptions::default();
340 let result = pcre_match("hello world", &match_opts, &state).unwrap();
341 assert!(result.matched);
342 assert_eq!(result.captures.len(), 2);
343 assert_eq!(result.captures[0], Some("hello".to_string()));
344 assert_eq!(result.captures[1], Some("world".to_string()));
345 }
346
347 #[test]
348 fn test_pcre_match_named_captures() {
349 let mut state = PcreState::new();
350 let options = PcreCompileOptions::default();
351 pcre_compile(r"(?P<first>\w+) (?P<second>\w+)", &options, &mut state).unwrap();
352
353 let match_opts = PcreMatchOptions::default();
354 let result = pcre_match("hello world", &match_opts, &state).unwrap();
355 assert!(result.matched);
356 assert_eq!(
357 result.named_captures.get("first"),
358 Some(&"hello".to_string())
359 );
360 assert_eq!(
361 result.named_captures.get("second"),
362 Some(&"world".to_string())
363 );
364 }
365
366 #[test]
367 fn test_pcre_match_with_offset() {
368 let mut state = PcreState::new();
369 let options = PcreCompileOptions::default();
370 pcre_compile("world", &options, &mut state).unwrap();
371
372 let match_opts = PcreMatchOptions {
373 offset: 6,
374 ..Default::default()
375 };
376 let result = pcre_match("hello world", &match_opts, &state).unwrap();
377 assert!(result.matched);
378 assert_eq!(result.match_start, Some(6));
379 }
380
381 #[test]
382 fn test_cond_pcre_match() {
383 let (matched, _) = cond_pcre_match("hello world", "hello", false);
384 assert!(matched);
385
386 let (matched, _) = cond_pcre_match("hello world", "HELLO", true);
387 assert!(matched);
388
389 let (matched, _) = cond_pcre_match("hello world", "HELLO", false);
390 assert!(!matched);
391 }
392
393 #[test]
394 fn test_builtin_pcre_compile_no_args() {
395 let mut state = PcreState::new();
396 let options = PcreCompileOptions::default();
397 let (status, _) = builtin_pcre_compile(&[], &options, &mut state);
398 assert_eq!(status, 1);
399 }
400
401 #[test]
402 fn test_builtin_pcre_match_no_pattern() {
403 let state = PcreState::new();
404 let options = PcreMatchOptions::default();
405 let (status, _) = builtin_pcre_match(&["test"], &options, &state);
406 assert_eq!(status, 1);
407 }
408}