1use std::collections::BTreeSet as Set;
2use finite_automata::{
3 Enfa,
4 Nfa,
5 Dfa,
6};
7use regular_expression_bootstrap::Re as ReBootstrap;
8use crate::{
9 grammar::{
10 LEXER_PRODUCTIONS,
11 PARSER_PRODUCTIONS,
12 Nonterminal,
13 as_expression,
14 },
15 Expression,
16};
17use simple_lexer_bootstrap::Lexer;
18use simple_parser_bootstrap::Parser;
19
20type Result<T> = std::result::Result<T, &'static str>;
21
22pub struct Re {
23 re: ReBootstrap,
24}
25
26impl Re {
27 pub fn new(expression: &str) -> Result<Re> {
28 let lexer = Lexer::new(LEXER_PRODUCTIONS.clone());
29 let parser = Parser::new(PARSER_PRODUCTIONS.clone(), Nonterminal::Root);
30 let tokens = lexer.lex(expression)?;
31 let parse_tree = parser.parse(&tokens)?;
32 let expression = as_expression(&parse_tree)?;
33 Ok(Re { re: ReBootstrap::new(expression) })
34 }
35
36 pub fn is_match(&self, text: &str) -> bool {
37 self.re.is_match(text)
38 }
39
40 pub fn into_expression(self) -> Expression {
41 self.re.into_expression()
42 }
43
44 pub fn as_expression(&self) -> &Expression {
45 self.re.as_expression()
46 }
47}
48
49impl From<Re> for Expression {
50 fn from(re: Re) -> Expression {
51 re.into_expression()
52 }
53}
54
55impl From<&Re> for Enfa<u32, u32> {
56 fn from(re: &Re) -> Enfa<u32, u32> {
57 Enfa::from(re.as_expression())
58 }
59}
60
61impl From<&Re> for Nfa<Set<u32>, u32> {
62 fn from(re: &Re) -> Nfa<Set<u32>, u32> {
63 Nfa::from(&Enfa::from(re))
64 }
65}
66
67impl From<&Re> for Dfa<Set<u32>, u32> {
68 fn from(re: &Re) -> Dfa<Set<u32>, u32> {
69 Dfa::from(&Enfa::from(re))
70 }
71}
72
73#[cfg(test)]
74mod tests {
75 use std::{
76 collections::BTreeSet as Set,
77 fmt::Debug,
78 u32,
79 };
80 use segment_map::Segment;
81 use finite_automata::{
82 Enfa,
83 Dfa,
84 };
85 use crate::Re;
86 use super::Result;
87
88 #[test]
89 fn test_enfa_epsilon() -> Result<()> {
90 let expected = ExpectedEnfa::<_, u32> {
91 initial: 0,
92 transitions: set![
93 (0, Segment::empty(), 1)
94 ],
95 finals: set![1]
96 };
97 let actual = Enfa::from(&Re::new(r"[]")?);
98 assert_enfa_eq(expected, actual);
99 Ok(())
100 }
101
102 #[test]
103 fn test_enfa_symbol() -> Result<()> {
104 let expected = ExpectedEnfa {
105 initial: 0,
106 transitions: set![
107 (0, Segment::singleton(u32::from('A')), 1)
108 ],
109 finals: set![1]
110 };
111 let actual = Enfa::from(&Re::new(r"A")?);
112 assert_enfa_eq(expected, actual);
113 Ok(())
114 }
115
116 #[test]
117 fn test_enfa_negated_symbol() -> Result<()> {
118 let expected = ExpectedEnfa {
119 initial: 0,
120 transitions: set![
121 (0, Segment::less_than(u32::from('A')), 1),
122 (0, Segment::greater_than(u32::from('A')), 1)
123 ],
124 finals: set![1]
125 };
126 let actual = Enfa::from(&Re::new(r"[^A]")?);
127 assert_enfa_eq(expected, actual);
128 Ok(())
129 }
130
131 #[test]
132 fn test_enfa_symbol_set() -> Result<()> {
133 let expected = ExpectedEnfa {
134 initial: 0,
135 transitions: set![
136 (0, Segment::closed(u32::from('A'), u32::from('Z')), 1),
137 (0, Segment::closed(u32::from('a'), u32::from('z')), 1)
138 ],
139 finals: set![1]
140 };
141 let actual = Enfa::from(&Re::new(r"[A-Za-z]")?);
142 assert_enfa_eq(expected, actual);
143 Ok(())
144 }
145
146 #[test]
147 fn test_enfa_negated_symbol_set() -> Result<()> {
148 let expected = ExpectedEnfa {
149 initial: 0,
150 transitions: set![
151 (0, Segment::less_than(u32::from('A')), 1),
152 (0, Segment::open(u32::from('Z'), u32::from('a')), 1),
153 (0, Segment::greater_than(u32::from('z')), 1)
154 ],
155 finals: set![1]
156 };
157 let actual = Enfa::from(&Re::new(r"[^A-Za-z]")?);
158 assert_enfa_eq(expected, actual);
159 Ok(())
160 }
161
162 #[test]
163 fn test_enfa_alternation() -> Result<()> {
164 let expected = ExpectedEnfa {
165 initial: 0,
166 transitions: set![
167 (0, Segment::empty(), 2),
168 (0, Segment::empty(), 4),
169 (2, Segment::singleton(u32::from('A')), 3),
170 (3, Segment::empty(), 1),
171 (4, Segment::singleton(u32::from('B')), 5),
172 (5, Segment::empty(), 1)
173 ],
174 finals: set![1]
175 };
176 let actual = Enfa::from(&Re::new("A|B")?);
177 assert_enfa_eq(expected, actual);
178 Ok(())
179 }
180
181 #[test]
182 fn test_enfa_concatenation() -> Result<()> {
183 let expected = ExpectedEnfa {
184 initial: 0,
185 transitions: set![
186 (0, Segment::empty(), 2),
187 (2, Segment::singleton(u32::from('A')), 3),
188 (3, Segment::empty(), 4),
189 (4, Segment::singleton(u32::from('B')), 5),
190 (5, Segment::empty(), 1)
191 ],
192 finals: set![1]
193 };
194 let actual = Enfa::from(&Re::new(r"AB")?);
195 assert_enfa_eq(expected, actual);
196 Ok(())
197 }
198
199 #[test]
200 fn test_enfa_repetition() -> Result<()> {
201 let expected = ExpectedEnfa {
202 initial: 0,
203 transitions: set![
204 (0, Segment::empty(), 2),
205 (0, Segment::empty(), 1),
206 (2, Segment::singleton(u32::from('A')), 3),
207 (3, Segment::empty(), 1),
208 (3, Segment::empty(), 2)
209 ],
210 finals: set![1]
211 };
212 let actual = Enfa::from(&Re::new(r"A*")?);
213 assert_enfa_eq(expected, actual);
214 Ok(())
215 }
216
217 #[test]
218 fn test_dfa_epsilon() -> Result<()> {
219 let expected = ExpectedDfa {
220 initial: set![0, 1],
221 transitions: set![],
222 finals: set![set![0, 1]]
223 };
224 let actual = Dfa::from(&Re::new(r"[]")?);
225 assert_dfa_eq(expected, actual);
226 Ok(())
227 }
228
229 #[test]
230 fn test_dfa_symbol() -> Result<()> {
231 let expected = ExpectedDfa {
232 initial: set![0],
233 transitions: set![
234 (set![0], Segment::singleton(u32::from('A')), set![1])
235 ],
236 finals: set![set![1]]
237 };
238 let actual = Dfa::from(&Re::new(r"A")?);
239 assert_dfa_eq(expected, actual);
240 Ok(())
241 }
242
243 #[test]
244 fn test_dfa_negated_symbol() -> Result<()> {
245 let expected = ExpectedDfa {
246 initial: set![0],
247 transitions: set![
248 (set![0], Segment::less_than(u32::from('A')), set![1]),
249 (set![0], Segment::greater_than(u32::from('A')), set![1])
250 ],
251 finals: set![set![1]]
252 };
253 let actual = Dfa::from(&Re::new(r"[^A]")?);
254 assert_dfa_eq(expected, actual);
255 Ok(())
256 }
257
258 #[test]
259 fn test_dfa_symbol_set() -> Result<()> {
260 let expected = ExpectedDfa {
261 initial: set![0],
262 transitions: set![
263 (set![0], Segment::closed(u32::from('A'), u32::from('Z')), set![1]),
264 (set![0], Segment::closed(u32::from('a'), u32::from('z')), set![1])
265 ],
266 finals: set![set![1]]
267 };
268 let actual = Dfa::from(&Re::new(r"[A-Za-z]")?);
269 assert_dfa_eq(expected, actual);
270 Ok(())
271 }
272
273 #[test]
274 fn test_dfa_negated_symbol_set() -> Result<()> {
275 let expected = ExpectedDfa {
276 initial: set![0],
277 transitions: set![
278 (set![0], Segment::less_than(u32::from('A')), set![1]),
279 (set![0], Segment::open(u32::from('Z'), u32::from('a')), set![1]),
280 (set![0], Segment::greater_than(u32::from('z')), set![1])
281 ],
282 finals: set![set![1]]
283 };
284 let actual = Dfa::from(&Re::new(r"[^A-Za-z]")?);
285 assert_dfa_eq(expected, actual);
286 Ok(())
287 }
288
289 #[test]
290 fn test_dfa_alternation() -> Result<()> {
291 let expected = ExpectedDfa {
292 initial: set![0, 2, 4],
293 transitions: set![
294 (set![0, 2, 4], Segment::singleton(u32::from('A')), set![1, 3]),
295 (set![0, 2, 4], Segment::singleton(u32::from('B')), set![1, 5])
296 ],
297 finals: set![set![1, 3], set![1, 5]]
298 };
299 let actual = Dfa::from(&Re::new(r"A|B")?);
300 assert_dfa_eq(expected, actual);
301 Ok(())
302 }
303
304 #[test]
305 fn test_dfa_concatenation() -> Result<()> {
306 let expected = ExpectedDfa {
307 initial: set![0, 2],
308 transitions: set![
309 (set![0, 2], Segment::singleton(u32::from('A')), set![3, 4]),
310 (set![3, 4], Segment::singleton(u32::from('B')), set![1, 5])
311 ],
312 finals: set![set![1, 5]]
313 };
314 let actual = Dfa::from(&Re::new(r"AB")?);
315 assert_dfa_eq(expected, actual);
316 Ok(())
317 }
318
319 #[test]
320 fn test_dfa_repetition() -> Result<()> {
321 let expected = ExpectedDfa {
322 initial: set![0, 1, 2],
323 transitions: set![
324 (set![0, 1, 2], Segment::singleton(u32::from('A')), set![1, 2, 3]),
325 (set![1, 2, 3], Segment::singleton(u32::from('A')), set![1, 2, 3])
326 ],
327 finals: set![set![0, 1, 2], set![1, 2, 3]]
328 };
329 let actual = Dfa::from(&Re::new(r"A*")?);
330 assert_dfa_eq(expected, actual);
331 Ok(())
332 }
333
334 #[test]
335 fn test_match_epsilon_1() -> Result<()> {
336 let re = Re::new(r"[]")?;
337 assert!(re.is_match(""));
338 Ok(())
339 }
340
341 #[test]
342 fn test_match_epsilon_2() -> Result<()> {
343 let re = Re::new(r"[]")?;
344 assert!(!re.is_match("A"));
345 Ok(())
346 }
347
348 #[test]
349 fn test_match_symbol_1() -> Result<()> {
350 let re = Re::new(r"A")?;
351 assert!(re.is_match("A"));
352 Ok(())
353 }
354
355 #[test]
356 fn test_match_symbol_2() -> Result<()> {
357 let re = Re::new(r"A")?;
358 assert!(!re.is_match("B"));
359 Ok(())
360 }
361
362 #[test]
363 fn test_match_negated_symbol_1() -> Result<()> {
364 let re = Re::new(r"[^A]")?;
365 assert!(re.is_match("B"));
366 Ok(())
367 }
368
369 #[test]
370 fn test_match_negated_symbol_2() -> Result<()> {
371 let re = Re::new(r"[^A]")?;
372 assert!(!re.is_match("A"));
373 Ok(())
374 }
375
376 #[test]
377 fn test_match_symbol_set_1() -> Result<()> {
378 let re = Re::new(r"[A-Za-z]")?;
379 assert!(re.is_match("D"));
380 Ok(())
381 }
382
383 #[test]
384 fn test_match_symbol_set_2() -> Result<()> {
385 let re = Re::new(r"[A-Za-z]")?;
386 assert!(!re.is_match("_"));
387 Ok(())
388 }
389
390 #[test]
391 fn test_match_negated_symbol_set_1() -> Result<()> {
392 let re = Re::new(r"[^A-Za-z]")?;
393 assert!(re.is_match("_"));
394 Ok(())
395 }
396
397 #[test]
398 fn test_match_negated_symbol_set_2() -> Result<()> {
399 let re = Re::new(r"[^A-Za-z]")?;
400 assert!(!re.is_match("D"));
401 Ok(())
402 }
403
404 #[test]
405 fn test_match_alternation_1() -> Result<()> {
406 let re = Re::new(r"A|B")?;
407 assert!(re.is_match("A"));
408 Ok(())
409 }
410
411 #[test]
412 fn test_match_alternation_2() -> Result<()> {
413 let re = Re::new(r"A|B")?;
414 assert!(re.is_match("B"));
415 Ok(())
416 }
417
418 #[test]
419 fn test_match_alternation_3() -> Result<()> {
420 let re = Re::new(r"A|B")?;
421 assert!(!re.is_match("C"));
422 Ok(())
423 }
424
425 #[test]
426 fn test_match_concatenation_1() -> Result<()> {
427 let re = Re::new(r"AB")?;
428 assert!(re.is_match("AB"));
429 Ok(())
430 }
431
432 #[test]
433 fn test_match_concatenation_2() -> Result<()> {
434 let re = Re::new(r"AB")?;
435 assert!(!re.is_match("AA"));
436 Ok(())
437 }
438
439 #[test]
440 fn test_match_repetition_1() -> Result<()> {
441 let re = Re::new(r"A*")?;
442 assert!(re.is_match("AAAAAAAAA"));
443 Ok(())
444 }
445
446 #[test]
447 fn test_match_repetition_2() -> Result<()> {
448 let re = Re::new(r"A*")?;
449 assert!(!re.is_match("AAAABAAAA"));
450 Ok(())
451 }
452
453 #[test]
454 fn test_match_repetition_3() -> Result<()> {
455 let re = Re::new(r"A*")?;
456 assert!(re.is_match(""));
457 Ok(())
458 }
459
460 #[test]
461 fn test_match_repetition_4() -> Result<()> {
462 let re = Re::new(r"A+")?;
463 assert!(re.is_match("AAAAAAAAAA"));
464 Ok(())
465 }
466
467 #[test]
468 fn test_match_repetition_5() -> Result<()> {
469 let re = Re::new(r"A+")?;
470 assert!(!re.is_match("AAAABAAAAA"));
471 Ok(())
472 }
473
474 #[test]
475 fn test_match_repetition_6() -> Result<()> {
476 let re = Re::new(r"A+")?;
477 assert!(!re.is_match(""));
478 Ok(())
479 }
480
481 #[test]
482 fn test_match_repetition_7() -> Result<()> {
483 let re = Re::new(r"A?")?;
484 assert!(re.is_match("A"));
485 Ok(())
486 }
487
488 #[test]
489 fn test_match_repetition_8() -> Result<()> {
490 let re = Re::new(r"A?")?;
491 assert!(!re.is_match("B"));
492 Ok(())
493 }
494
495 #[test]
496 fn test_match_repetition_9() -> Result<()> {
497 let re = Re::new(r"A?")?;
498 assert!(re.is_match(""));
499 Ok(())
500 }
501
502 #[test]
503 fn test_match_repetition_10() -> Result<()> {
504 let re = Re::new(r"A{,3}")?;
505 assert!(re.is_match(""));
506 Ok(())
507 }
508
509 #[test]
510 fn test_match_repetition_11() -> Result<()> {
511 let re = Re::new(r"A{,3}")?;
512 assert!(re.is_match("AA"));
513 Ok(())
514 }
515
516 #[test]
517 fn test_match_repetition_12() -> Result<()> {
518 let re = Re::new(r"A{,3}")?;
519 assert!(re.is_match("AAA"));
520 Ok(())
521 }
522
523 #[test]
524 fn test_match_repetition_13() -> Result<()> {
525 let re = Re::new(r"A{,3}")?;
526 assert!(!re.is_match("AAAA"));
527 Ok(())
528 }
529
530 #[test]
531 fn test_match_repetition_14() -> Result<()> {
532 let re = Re::new(r"A{3,}")?;
533 assert!(!re.is_match(""));
534 Ok(())
535 }
536
537 #[test]
538 fn test_match_repetition_15() -> Result<()> {
539 let re = Re::new(r"A{3,}")?;
540 assert!(!re.is_match("AA"));
541 Ok(())
542 }
543
544 #[test]
545 fn test_match_repetition_16() -> Result<()> {
546 let re = Re::new(r"A{3,}")?;
547 assert!(re.is_match("AAA"));
548 Ok(())
549 }
550
551 #[test]
552 fn test_match_repetition_17() -> Result<()> {
553 let re = Re::new(r"A{3,}")?;
554 assert!(re.is_match("AAAA"));
555 Ok(())
556 }
557
558 #[test]
559 fn test_match_repetition_18() -> Result<()> {
560 let re = Re::new(r"A{3}")?;
561 assert!(!re.is_match(""));
562 Ok(())
563 }
564
565 #[test]
566 fn test_match_repetition_19() -> Result<()> {
567 let re = Re::new(r"A{3}")?;
568 assert!(!re.is_match("AA"));
569 Ok(())
570 }
571
572 #[test]
573 fn test_match_repetition_20() -> Result<()> {
574 let re = Re::new(r"A{3}")?;
575 assert!(re.is_match("AAA"));
576 Ok(())
577 }
578
579 #[test]
580 fn test_match_repetition_21() -> Result<()> {
581 let re = Re::new(r"A{3}")?;
582 assert!(!re.is_match("AAAA"));
583 Ok(())
584 }
585
586 #[test]
587 fn test_match_repetition_22() -> Result<()> {
588 let re = Re::new(r"A{2,3}")?;
589 assert!(!re.is_match(""));
590 Ok(())
591 }
592
593 #[test]
594 fn test_match_repetition_23() -> Result<()> {
595 let re = Re::new(r"A{2,3}")?;
596 assert!(re.is_match("AA"));
597 Ok(())
598 }
599
600 #[test]
601 fn test_match_repetition_24() -> Result<()> {
602 let re = Re::new(r"A{2,3}")?;
603 assert!(re.is_match("AAA"));
604 Ok(())
605 }
606
607 #[test]
608 fn test_match_repetition_25() -> Result<()> {
609 let re = Re::new(r"A{2,3}")?;
610 assert!(!re.is_match("AAAA"));
611 Ok(())
612 }
613
614 #[test]
615 fn test_match_complex_1() -> Result<()> {
616 let re = Re::new(r"(A|B)*(AAA|BBB)(A|B)*")?;
617 assert!(re.is_match("ABBBA"));
618 Ok(())
619 }
620
621 #[test]
622 fn test_match_complex_2() -> Result<()> {
623 let re = Re::new(r"(A|B)*(AAA|BBB)(A|B)*")?;
624 assert!(!re.is_match("ABBA"));
625 Ok(())
626 }
627
628 #[test]
629 fn test_match_complex_3() -> Result<()> {
630 let re = Re::new(r"(A|B)*(AAA|BBB)(A|B)*")?;
631 assert!(re.is_match("ABBAAA"));
632 Ok(())
633 }
634
635 #[test]
636 fn test_match_all() -> Result<()> {
637 let re = Re::new(r".")?;
638 assert!(re.is_match("A"));
639 Ok(())
640 }
641
642 struct ExpectedEnfa<S, T> {
643 initial: S,
644 transitions: Set<(S, Segment<T>, S)>,
645 finals: Set<S>,
646 }
647
648 fn assert_enfa_eq<S: Clone + Debug + Ord, T: Clone + Debug + Ord>(expected: ExpectedEnfa<S, T>, actual: Enfa<S, T>) {
649 assert_eq!(expected.initial, actual.states_index(actual.initial_index()).clone());
650 assert_eq!(expected.transitions, actual.transitions_slice(actual.transition_indices()).map(|(source, transition, target)| (actual.states_index(source).clone(), transition.clone(), actual.states_index(target).clone())).collect());
651 assert_eq!(expected.finals, actual.states_slice(actual.final_indices()).cloned().collect());
652 }
653
654 struct ExpectedDfa<S, T> {
655 initial: S,
656 transitions: Set<(S, Segment<T>, S)>,
657 finals: Set<S>,
658 }
659
660 fn assert_dfa_eq<S: Clone + Debug + Ord, T: Clone + Debug + Ord>(expected: ExpectedDfa<Set<S>, T>, actual: Dfa<Set<S>, T>) {
661 assert_eq!(expected.initial, actual.states_index(actual.initial_index()).clone());
662 assert_eq!(expected.transitions, actual.transitions_slice(actual.transition_indices()).map(|(source, transition, target)| (actual.states_index(source).clone(), transition.clone(), actual.states_index(target).clone())).collect());
663 assert_eq!(expected.finals, actual.states_slice(actual.final_indices()).cloned().collect());
664 }
665}