1use super::compiler::{Program, compile};
2use super::dfa::SimpleDFA;
3use super::engine::{Match, execute as nfa_execute};
4use super::fast_class::FastClassMatcher;
5use super::optimizer::{OptimizedPattern, analyze_pattern};
6use super::parser::parse;
7
8fn is_pure_literal(pattern: &str) -> bool {
9 for c in pattern.chars() {
10 match c {
11 '\\' | '.' | '+' | '*' | '?' | '^' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '|' => {
12 return false;
13 }
14 _ => {}
15 }
16 }
17 true
18}
19
20#[derive(Debug, Clone, Copy, PartialEq)]
21pub enum ExecMode {
22 LiteralFast,
23
24 FastClass,
25
26 LiteralDFA,
27
28 ClassDFA,
29
30 OptimizedNFA,
31
32 FullNFA,
33}
34
35#[derive(Clone)]
36pub struct FastRegex {
37 mode: ExecMode,
38
39 fast_class: Option<FastClassMatcher>,
40
41 dfa: Option<SimpleDFA>,
42
43 program: Option<Program>,
44}
45
46impl FastRegex {
47 pub fn new(pattern: &str, flags: u16) -> Result<Self, String> {
48 if let Some(fast_class) = FastClassMatcher::from_pattern(pattern) {
49 return Ok(Self {
50 mode: ExecMode::FastClass,
51 fast_class: Some(fast_class),
52 dfa: None,
53 program: None,
54 });
55 }
56
57 if is_pure_literal(pattern) {
58 if let Some(dfa) = SimpleDFA::new(pattern) {
59 return Ok(Self {
60 mode: ExecMode::LiteralFast,
61 fast_class: None,
62 dfa: Some(dfa),
63 program: None,
64 });
65 }
66 }
67
68 if let Some(dfa) = SimpleDFA::new(pattern) {
69 return Ok(Self {
70 mode: ExecMode::ClassDFA,
71 fast_class: None,
72 dfa: Some(dfa),
73 program: None,
74 });
75 }
76
77 let ast = parse(pattern, flags)?;
78
79 let optimized = analyze_pattern(&ast);
80
81 match optimized {
82 OptimizedPattern::LiteralChar(c) => {
83 let mut literal_str = String::new();
84 literal_str.push(c);
85 let dfa = SimpleDFA::new(&literal_str).ok_or("Failed to create literal DFA")?;
86 Ok(Self {
87 mode: ExecMode::LiteralFast,
88 fast_class: None,
89 dfa: Some(dfa),
90 program: None,
91 })
92 }
93 OptimizedPattern::LiteralString(s) => {
94 let dfa = SimpleDFA::new(&s).ok_or("Failed to create literal DFA")?;
95 Ok(Self {
96 mode: ExecMode::LiteralFast,
97 fast_class: None,
98 dfa: Some(dfa),
99 program: None,
100 })
101 }
102 OptimizedPattern::CharClass(_) => {
103 let program = compile(&ast, flags)?;
104 Ok(Self {
105 mode: ExecMode::FullNFA,
106 fast_class: None,
107 dfa: None,
108 program: Some(program),
109 })
110 }
111 OptimizedPattern::Simple(ast) => {
112 let program = compile(&ast, flags).ok();
113 Ok(Self {
114 mode: if program.is_some() {
115 ExecMode::OptimizedNFA
116 } else {
117 ExecMode::FullNFA
118 },
119 fast_class: None,
120 dfa: None,
121 program,
122 })
123 }
124 OptimizedPattern::Complex(ast) => {
125 let program = compile(&ast, flags)?;
126 Ok(Self {
127 mode: ExecMode::FullNFA,
128 fast_class: None,
129 dfa: None,
130 program: Some(program),
131 })
132 }
133 }
134 }
135
136 pub fn find(&self, input: &str) -> Option<Match> {
137 match self.mode {
138 ExecMode::LiteralFast => self.dfa.as_ref().and_then(|dfa| dfa.find(input)),
139 ExecMode::FastClass => {
140 if let Some(ref fast_class) = self.fast_class {
141 fast_class.find(input)
142 } else {
143 None
144 }
145 }
146 ExecMode::LiteralDFA | ExecMode::ClassDFA => {
147 if let Some(ref dfa) = self.dfa {
148 dfa.find(input)
149 } else {
150 None
151 }
152 }
153 ExecMode::OptimizedNFA => {
154 if let Some(ref dfa) = self.dfa {
155 if let Some(m) = dfa.find(input) {
156 return Some(m);
157 }
158 }
159
160 if let Some(ref program) = self.program {
161 nfa_execute(program, input, 0)
162 } else {
163 None
164 }
165 }
166 ExecMode::FullNFA => {
167 if let Some(ref program) = self.program {
168 nfa_execute(program, input, 0)
169 } else {
170 None
171 }
172 }
173 }
174 }
175
176 #[inline(always)]
177 pub fn is_match(&self, input: &str) -> bool {
178 match self.mode {
179 ExecMode::LiteralFast => self.dfa.as_ref().map_or(false, |dfa| dfa.is_match(input)),
180 ExecMode::FastClass => {
181 if let Some(ref fast_class) = self.fast_class {
182 fast_class.is_match(input)
183 } else {
184 false
185 }
186 }
187 ExecMode::LiteralDFA | ExecMode::ClassDFA => {
188 if let Some(ref dfa) = self.dfa {
189 dfa.is_match(input)
190 } else {
191 false
192 }
193 }
194 ExecMode::OptimizedNFA | ExecMode::FullNFA => self.find(input).is_some(),
195 }
196 }
197
198 pub fn find_all(&self, input: &str) -> Vec<Match> {
199 if let Some(ref dfa) = self.dfa {
200 return dfa.find_all(input);
201 }
202
203 if let Some(ref fast_class) = self.fast_class {
204 return fast_class.find_all(input);
205 }
206
207 let mut matches = Vec::new();
208 let mut pos = 0;
209
210 while pos <= input.len() {
211 if let Some(m) = self.find_from(input, pos) {
212 let match_end = m.end;
213 matches.push(m);
214
215 if match_end <= pos {
216 pos += 1;
217 } else {
218 pos = match_end;
219 }
220 } else {
221 break;
222 }
223 }
224
225 matches
226 }
227
228 fn find_from(&self, input: &str, start: usize) -> Option<Match> {
229 if start >= input.len() {
230 return None;
231 }
232
233 let slice = &input[start..];
234
235 match self.mode {
236 ExecMode::LiteralDFA | ExecMode::ClassDFA => {
237 if let Some(ref dfa) = self.dfa {
238 dfa.find(slice).map(|m| Match {
239 start: m.start + start,
240 end: m.end + start,
241 captures: m
242 .captures
243 .iter()
244 .map(|(s, e)| (s.map(|x| x + start), e.map(|x| x + start)))
245 .collect(),
246 })
247 } else {
248 None
249 }
250 }
251 _ => {
252 if let Some(ref program) = self.program {
253 nfa_execute(program, input, start)
254 } else {
255 None
256 }
257 }
258 }
259 }
260
261 pub fn mode(&self) -> ExecMode {
262 self.mode
263 }
264
265 pub fn dfa(&self) -> Option<&SimpleDFA> {
266 self.dfa.as_ref()
267 }
268}
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273
274 #[test]
275 fn test_fast_literal() {
276 let re = FastRegex::new("hello", 0).unwrap();
277
278 assert_eq!(re.mode(), ExecMode::LiteralFast);
279 assert!(re.is_match("hello world"));
280 assert!(!re.is_match("goodbye"));
281 }
282
283 #[test]
284 fn test_fast_literal_find() {
285 let re = FastRegex::new("world", 0).unwrap();
286 let m = re.find("hello world").unwrap();
287 assert_eq!(m.start, 6);
288 assert_eq!(m.end, 11);
289 }
290
291 #[test]
292 fn test_fast_char_class() {
293 let re = FastRegex::new(r"\w+", 0).unwrap();
294 assert_eq!(
295 re.mode(),
296 ExecMode::FastClass,
297 "\\w+ should use FastClass mode"
298 );
299
300 let m = re.find("hello world").unwrap();
301 assert_eq!(m.start, 0);
302 assert_eq!(m.end, 5);
303
304 let re2 = FastRegex::new(r"\d+", 0).unwrap();
305 assert_eq!(
306 re2.mode(),
307 ExecMode::FastClass,
308 "\\d+ should use FastClass mode"
309 );
310
311 let m2 = re2.find("abc123def").unwrap();
312 assert_eq!(m2.start, 3);
313 assert_eq!(m2.end, 6);
314
315 let re3 = FastRegex::new(r"\s+", 0).unwrap();
316 assert_eq!(
317 re3.mode(),
318 ExecMode::FastClass,
319 "\\s+ should use FastClass mode"
320 );
321
322 let m3 = re3.find("hello world").unwrap();
323 assert_eq!(m3.start, 5);
324 assert_eq!(m3.end, 8);
325 }
326
327 #[test]
328 fn test_find_all() {
329 let re = FastRegex::new("a", 0).unwrap();
330 let matches = re.find_all("banana");
331 assert_eq!(matches.len(), 3);
332 }
333
334 #[test]
335 fn test_is_match_fast() {
336 let re = FastRegex::new(r"\d+", 0).unwrap();
337 assert!(re.is_match("abc123def"));
338 assert!(!re.is_match("abcdef"));
339 }
340
341 #[test]
342 fn test_fast_literal_is_match() {
343 let re = FastRegex::new("hello", 0).unwrap();
344 assert!(re.is_match("hello world"));
345 assert!(re.is_match("say hello"));
346 assert!(!re.is_match("goodbye"));
347 }
348
349 #[test]
350 fn test_fast_empty_pattern() {
351 let re = FastRegex::new("", 0).unwrap();
352 assert!(re.is_match("anything"));
353 assert!(re.is_match(""));
354 }
355
356 #[test]
357 fn test_fast_single_char() {
358 let re = FastRegex::new("x", 0).unwrap();
359 assert!(re.is_match("xyz"));
360 assert!(re.is_match("abc x def"));
361 assert!(!re.is_match("abc"));
362 }
363
364 #[test]
365 fn test_fast_anchors() {
366 let re_start = FastRegex::new("^hello", 0).unwrap();
367 assert!(re_start.is_match("hello world"));
368 assert!(!re_start.is_match("say hello"));
369
370 let re_end = FastRegex::new("world$", 0).unwrap();
371 assert!(re_end.is_match("hello world"));
372 assert!(!re_end.is_match("worldly"));
373 }
374
375 #[test]
376 fn test_fast_star_plus() {
377 let re_star = FastRegex::new("a*", 0).unwrap();
378 assert!(re_star.is_match(""));
379 assert!(re_star.is_match("a"));
380 assert!(re_star.is_match("aaa"));
381 assert!(re_star.is_match("baaa"));
382
383 let re_plus = FastRegex::new("a+", 0).unwrap();
384 assert!(!re_plus.is_match(""));
385 assert!(!re_plus.is_match("bbb"));
386 assert!(re_plus.is_match("a"));
387 assert!(re_plus.is_match("aaa"));
388 }
389
390 #[test]
391 fn test_find_all_words() {
392 let re = FastRegex::new(r"\w+", 0).unwrap();
393 let matches = re.find_all("hello world test");
394 assert_eq!(matches.len(), 3);
395 assert_eq!(matches[0].start, 0);
396 assert_eq!(matches[0].end, 5);
397 assert_eq!(matches[1].start, 6);
398 assert_eq!(matches[1].end, 11);
399 assert_eq!(matches[2].start, 12);
400 assert_eq!(matches[2].end, 16);
401 }
402
403 #[test]
404 fn test_find_all_digits() {
405 let re = FastRegex::new(r"\d+", 0).unwrap();
406 let matches = re.find_all("a1b22c333d");
407 assert_eq!(matches.len(), 3);
408 assert_eq!(matches[0].as_str("a1b22c333d"), "1");
409 assert_eq!(matches[1].as_str("a1b22c333d"), "22");
410 assert_eq!(matches[2].as_str("a1b22c333d"), "333");
411 }
412
413 #[test]
414 fn test_find_all_no_matches() {
415 let re = FastRegex::new("xyz", 0).unwrap();
416 let matches = re.find_all("abc");
417 assert!(matches.is_empty());
418 }
419
420 #[test]
421 fn test_fast_unicode() {
422 let re = FastRegex::new("hello", 0).unwrap();
423 assert!(re.is_match("你好 hello 世界"));
424
425 let m = re.find("你好 hello 世界").unwrap();
426 assert_eq!(m.as_str("你好 hello 世界"), "hello");
427 }
428
429 #[test]
430 fn test_fast_pattern_with_special_chars() {
431 let re_dot = FastRegex::new("a.b", 0).unwrap();
432 assert!(re_dot.is_match("axb"));
433 assert!(!re_dot.is_match("ab"));
434
435 let re_alt = FastRegex::new("a|b", 0).unwrap();
436 assert!(re_alt.is_match("a"));
437 assert!(re_alt.is_match("b"));
438 assert!(!re_alt.is_match("c"));
439 }
440
441 #[test]
442 fn test_exec_mode_display() {
443 assert_eq!(format!("{:?}", ExecMode::LiteralFast), "LiteralFast");
444 assert_eq!(format!("{:?}", ExecMode::FastClass), "FastClass");
445 assert_eq!(format!("{:?}", ExecMode::ClassDFA), "ClassDFA");
446 }
447}