regular_expression/
re.rs

1use std::collections::BTreeSet as Set;
2use finite_automata::{
3    Enfa,
4    Nfa,
5    Dfa,
6};
7use regular_expression_bootstrap::Re as ReBootstrap;
8use crate::{
9    grammar::{
10        LEXER_PRODUCTIONS,
11        PARSER_PRODUCTIONS,
12        Nonterminal,
13        as_expression,
14    },
15    Expression,
16};
17use simple_lexer_bootstrap::Lexer;
18use simple_parser_bootstrap::Parser;
19
20type Result<T> = std::result::Result<T, &'static str>;
21
22pub struct Re {
23    re: ReBootstrap,
24}
25
26impl Re {
27    pub fn new(expression: &str) -> Result<Re> {
28        let lexer = Lexer::new(LEXER_PRODUCTIONS.clone());
29        let parser = Parser::new(PARSER_PRODUCTIONS.clone(), Nonterminal::Root);
30        let tokens = lexer.lex(expression)?;
31        let parse_tree = parser.parse(&tokens)?;
32        let expression = as_expression(&parse_tree)?;
33        Ok(Re { re: ReBootstrap::new(expression) })
34    }
35
36    pub fn is_match(&self, text: &str) -> bool {
37        self.re.is_match(text)
38    }
39
40    pub fn into_expression(self) -> Expression {
41        self.re.into_expression()
42    }
43
44    pub fn as_expression(&self) -> &Expression {
45        self.re.as_expression()
46    }
47}
48
49impl From<Re> for Expression {
50    fn from(re: Re) -> Expression {
51        re.into_expression()
52    }
53}
54
55impl From<&Re> for Enfa<u32, u32> {
56    fn from(re: &Re) -> Enfa<u32, u32> {
57        Enfa::from(re.as_expression())
58    }
59}
60
61impl From<&Re> for Nfa<Set<u32>, u32> {
62    fn from(re: &Re) -> Nfa<Set<u32>, u32> {
63        Nfa::from(&Enfa::from(re))
64    }
65}
66
67impl From<&Re> for Dfa<Set<u32>, u32> {
68    fn from(re: &Re) -> Dfa<Set<u32>, u32> {
69        Dfa::from(&Enfa::from(re))
70    }
71}
72
73#[cfg(test)]
74mod tests {
75    use std::{
76        collections::BTreeSet as Set,
77        fmt::Debug,
78        u32,
79    };
80    use segment_map::Segment;
81    use finite_automata::{
82        Enfa,
83        Dfa,
84    };
85    use crate::Re;
86    use super::Result;
87
88    #[test]
89    fn test_enfa_epsilon() -> Result<()> {
90         let expected = ExpectedEnfa::<_, u32> {
91            initial: 0,
92            transitions: set![
93                (0, Segment::empty(), 1)
94            ],
95            finals: set![1]
96        };
97        let actual = Enfa::from(&Re::new(r"[]")?);
98        assert_enfa_eq(expected, actual);
99        Ok(())
100    }
101
102    #[test]
103    fn test_enfa_symbol() -> Result<()> {
104        let expected = ExpectedEnfa {
105            initial: 0,
106            transitions: set![
107                (0, Segment::singleton(u32::from('A')), 1)
108            ],
109            finals: set![1]
110        };
111        let actual = Enfa::from(&Re::new(r"A")?);
112        assert_enfa_eq(expected, actual);
113        Ok(())
114    }
115
116    #[test]
117    fn test_enfa_negated_symbol() -> Result<()> {
118        let expected = ExpectedEnfa {
119            initial: 0,
120            transitions: set![
121                (0, Segment::less_than(u32::from('A')), 1),
122                (0, Segment::greater_than(u32::from('A')), 1)
123            ],
124            finals: set![1]
125        };
126        let actual = Enfa::from(&Re::new(r"[^A]")?);
127        assert_enfa_eq(expected, actual);
128        Ok(())
129    }
130
131    #[test]
132    fn test_enfa_symbol_set() -> Result<()> {
133        let expected = ExpectedEnfa {
134            initial: 0,
135            transitions: set![
136                (0, Segment::closed(u32::from('A'), u32::from('Z')), 1),
137                (0, Segment::closed(u32::from('a'), u32::from('z')), 1)
138            ],
139            finals: set![1]
140        };
141        let actual = Enfa::from(&Re::new(r"[A-Za-z]")?);
142        assert_enfa_eq(expected, actual);
143        Ok(())
144    }
145
146    #[test]
147    fn test_enfa_negated_symbol_set() -> Result<()> {
148        let expected = ExpectedEnfa {
149            initial: 0,
150            transitions: set![
151                (0, Segment::less_than(u32::from('A')), 1),
152                (0, Segment::open(u32::from('Z'), u32::from('a')), 1),
153                (0, Segment::greater_than(u32::from('z')), 1)
154            ],
155            finals: set![1]
156        };
157        let actual = Enfa::from(&Re::new(r"[^A-Za-z]")?);
158        assert_enfa_eq(expected, actual);
159        Ok(())
160    }
161
162    #[test]
163    fn test_enfa_alternation() -> Result<()> {
164        let expected = ExpectedEnfa {
165            initial: 0,
166            transitions: set![
167                (0, Segment::empty(), 2),
168                (0, Segment::empty(), 4),
169                (2, Segment::singleton(u32::from('A')), 3),
170                (3, Segment::empty(), 1),
171                (4, Segment::singleton(u32::from('B')), 5),
172                (5, Segment::empty(), 1)
173            ],
174            finals: set![1]
175        };
176        let actual = Enfa::from(&Re::new("A|B")?);
177        assert_enfa_eq(expected, actual);
178        Ok(())
179    }
180
181    #[test]
182    fn test_enfa_concatenation() -> Result<()> {
183        let expected = ExpectedEnfa {
184            initial: 0,
185            transitions: set![
186                (0, Segment::empty(), 2),
187                (2, Segment::singleton(u32::from('A')), 3),
188                (3, Segment::empty(), 4),
189                (4, Segment::singleton(u32::from('B')), 5),
190                (5, Segment::empty(), 1)
191            ],
192            finals: set![1]
193        };
194        let actual = Enfa::from(&Re::new(r"AB")?);
195        assert_enfa_eq(expected, actual);
196        Ok(())
197    }
198
199    #[test]
200    fn test_enfa_repetition() -> Result<()> {
201        let expected = ExpectedEnfa {
202            initial: 0,
203            transitions: set![
204                (0, Segment::empty(), 2),
205                (0, Segment::empty(), 1),
206                (2, Segment::singleton(u32::from('A')), 3),
207                (3, Segment::empty(), 1),
208                (3, Segment::empty(), 2)
209            ],
210            finals: set![1]
211        };
212        let actual = Enfa::from(&Re::new(r"A*")?);
213        assert_enfa_eq(expected, actual);
214        Ok(())
215    }
216
217    #[test]
218    fn test_dfa_epsilon() -> Result<()> {
219        let expected = ExpectedDfa {
220            initial: set![0, 1],
221            transitions: set![],
222            finals: set![set![0, 1]]
223        };
224        let actual = Dfa::from(&Re::new(r"[]")?);
225        assert_dfa_eq(expected, actual);
226        Ok(())
227    }
228
229    #[test]
230    fn test_dfa_symbol() -> Result<()> {
231        let expected = ExpectedDfa {
232            initial: set![0],
233            transitions: set![
234                (set![0], Segment::singleton(u32::from('A')), set![1])
235            ],
236            finals: set![set![1]]
237        };
238        let actual = Dfa::from(&Re::new(r"A")?);
239        assert_dfa_eq(expected, actual);
240        Ok(())
241    }
242
243    #[test]
244    fn test_dfa_negated_symbol() -> Result<()> {
245        let expected = ExpectedDfa {
246            initial: set![0],
247            transitions: set![
248                (set![0], Segment::less_than(u32::from('A')), set![1]),
249                (set![0], Segment::greater_than(u32::from('A')), set![1])
250            ],
251            finals: set![set![1]]
252        };
253        let actual = Dfa::from(&Re::new(r"[^A]")?);
254        assert_dfa_eq(expected, actual);
255        Ok(())
256    }
257
258    #[test]
259    fn test_dfa_symbol_set() -> Result<()> {
260        let expected = ExpectedDfa {
261            initial: set![0],
262            transitions: set![
263                (set![0], Segment::closed(u32::from('A'), u32::from('Z')), set![1]),
264                (set![0], Segment::closed(u32::from('a'), u32::from('z')), set![1])
265            ],
266            finals: set![set![1]]
267        };
268        let actual = Dfa::from(&Re::new(r"[A-Za-z]")?);
269        assert_dfa_eq(expected, actual);
270        Ok(())
271    }
272
273    #[test]
274    fn test_dfa_negated_symbol_set() -> Result<()> {
275        let expected = ExpectedDfa {
276            initial: set![0],
277            transitions: set![
278                (set![0], Segment::less_than(u32::from('A')), set![1]),
279                (set![0], Segment::open(u32::from('Z'), u32::from('a')), set![1]),
280                (set![0], Segment::greater_than(u32::from('z')), set![1])
281            ],
282            finals: set![set![1]]
283        };
284        let actual = Dfa::from(&Re::new(r"[^A-Za-z]")?);
285        assert_dfa_eq(expected, actual);
286        Ok(())
287    }
288
289    #[test]
290    fn test_dfa_alternation() -> Result<()> {
291        let expected = ExpectedDfa {
292            initial: set![0, 2, 4],
293            transitions: set![
294                (set![0, 2, 4], Segment::singleton(u32::from('A')), set![1, 3]),
295                (set![0, 2, 4], Segment::singleton(u32::from('B')), set![1, 5])
296            ],
297            finals: set![set![1, 3], set![1, 5]]
298        };
299        let actual = Dfa::from(&Re::new(r"A|B")?);
300        assert_dfa_eq(expected, actual);
301        Ok(())
302    }
303
304    #[test]
305    fn test_dfa_concatenation() -> Result<()> {
306        let expected = ExpectedDfa {
307            initial: set![0, 2],
308            transitions: set![
309                (set![0, 2], Segment::singleton(u32::from('A')), set![3, 4]),
310                (set![3, 4], Segment::singleton(u32::from('B')), set![1, 5])
311            ],
312            finals: set![set![1, 5]]
313        };
314        let actual = Dfa::from(&Re::new(r"AB")?);
315        assert_dfa_eq(expected, actual);
316        Ok(())
317    }
318
319    #[test]
320    fn test_dfa_repetition() -> Result<()> {
321        let expected = ExpectedDfa {
322            initial: set![0, 1, 2],
323            transitions: set![
324                (set![0, 1, 2], Segment::singleton(u32::from('A')), set![1, 2, 3]),
325                (set![1, 2, 3], Segment::singleton(u32::from('A')), set![1, 2, 3])
326            ],
327            finals: set![set![0, 1, 2], set![1, 2, 3]]
328        };
329        let actual = Dfa::from(&Re::new(r"A*")?);
330        assert_dfa_eq(expected, actual);
331        Ok(())
332    }
333
334    #[test]
335    fn test_match_epsilon_1() -> Result<()> {
336        let re = Re::new(r"[]")?;
337        assert!(re.is_match(""));
338        Ok(())
339    }
340
341    #[test]
342    fn test_match_epsilon_2() -> Result<()> {
343        let re = Re::new(r"[]")?;
344        assert!(!re.is_match("A"));
345        Ok(())
346    }
347
348    #[test]
349    fn test_match_symbol_1() -> Result<()> {
350        let re = Re::new(r"A")?;
351        assert!(re.is_match("A"));
352        Ok(())
353    }
354
355    #[test]
356    fn test_match_symbol_2() -> Result<()> {
357        let re = Re::new(r"A")?;
358        assert!(!re.is_match("B"));
359        Ok(())
360    }
361
362    #[test]
363    fn test_match_negated_symbol_1() -> Result<()> {
364        let re = Re::new(r"[^A]")?;
365        assert!(re.is_match("B"));
366        Ok(())
367    }
368
369    #[test]
370    fn test_match_negated_symbol_2() -> Result<()> {
371        let re = Re::new(r"[^A]")?;
372        assert!(!re.is_match("A"));
373        Ok(())
374    }
375
376    #[test]
377    fn test_match_symbol_set_1() -> Result<()> {
378        let re = Re::new(r"[A-Za-z]")?;
379        assert!(re.is_match("D"));
380        Ok(())
381    }
382
383    #[test]
384    fn test_match_symbol_set_2() -> Result<()> {
385        let re = Re::new(r"[A-Za-z]")?;
386        assert!(!re.is_match("_"));
387        Ok(())
388    }
389
390    #[test]
391    fn test_match_negated_symbol_set_1() -> Result<()> {
392        let re = Re::new(r"[^A-Za-z]")?;
393        assert!(re.is_match("_"));
394        Ok(())
395    }
396
397    #[test]
398    fn test_match_negated_symbol_set_2() -> Result<()> {
399        let re = Re::new(r"[^A-Za-z]")?;
400        assert!(!re.is_match("D"));
401        Ok(())
402    }
403
404    #[test]
405    fn test_match_alternation_1() -> Result<()> {
406        let re = Re::new(r"A|B")?;
407        assert!(re.is_match("A"));
408        Ok(())
409    }
410
411    #[test]
412    fn test_match_alternation_2() -> Result<()> {
413        let re = Re::new(r"A|B")?;
414        assert!(re.is_match("B"));
415        Ok(())
416    }
417
418    #[test]
419    fn test_match_alternation_3() -> Result<()> {
420        let re = Re::new(r"A|B")?;
421        assert!(!re.is_match("C"));
422        Ok(())
423    }
424
425    #[test]
426    fn test_match_concatenation_1() -> Result<()> {
427        let re = Re::new(r"AB")?;
428        assert!(re.is_match("AB"));
429        Ok(())
430    }
431
432    #[test]
433    fn test_match_concatenation_2() -> Result<()> {
434        let re = Re::new(r"AB")?;
435        assert!(!re.is_match("AA"));
436        Ok(())
437    }
438
439    #[test]
440    fn test_match_repetition_1() -> Result<()> {
441        let re = Re::new(r"A*")?;
442        assert!(re.is_match("AAAAAAAAA"));
443        Ok(())
444    }
445
446    #[test]
447    fn test_match_repetition_2() -> Result<()> {
448        let re = Re::new(r"A*")?;
449        assert!(!re.is_match("AAAABAAAA"));
450        Ok(())
451    }
452
453    #[test]
454    fn test_match_repetition_3() -> Result<()> {
455        let re = Re::new(r"A*")?;
456        assert!(re.is_match(""));
457        Ok(())
458    }
459
460    #[test]
461    fn test_match_repetition_4() -> Result<()> {
462        let re = Re::new(r"A+")?;
463        assert!(re.is_match("AAAAAAAAAA"));
464        Ok(())
465    }
466
467    #[test]
468    fn test_match_repetition_5() -> Result<()> {
469        let re = Re::new(r"A+")?;
470        assert!(!re.is_match("AAAABAAAAA"));
471        Ok(())
472    }
473
474    #[test]
475    fn test_match_repetition_6() -> Result<()> {
476        let re = Re::new(r"A+")?;
477        assert!(!re.is_match(""));
478        Ok(())
479    }
480
481    #[test]
482    fn test_match_repetition_7() -> Result<()> {
483        let re = Re::new(r"A?")?;
484        assert!(re.is_match("A"));
485        Ok(())
486    }
487
488    #[test]
489    fn test_match_repetition_8() -> Result<()> {
490        let re = Re::new(r"A?")?;
491        assert!(!re.is_match("B"));
492        Ok(())
493    }
494
495    #[test]
496    fn test_match_repetition_9() -> Result<()> {
497        let re = Re::new(r"A?")?;
498        assert!(re.is_match(""));
499        Ok(())
500    }
501
502    #[test]
503    fn test_match_repetition_10() -> Result<()> {
504        let re = Re::new(r"A{,3}")?;
505        assert!(re.is_match(""));
506        Ok(())
507    }
508
509    #[test]
510    fn test_match_repetition_11() -> Result<()> {
511        let re = Re::new(r"A{,3}")?;
512        assert!(re.is_match("AA"));
513        Ok(())
514    }
515
516    #[test]
517    fn test_match_repetition_12() -> Result<()> {
518        let re = Re::new(r"A{,3}")?;
519        assert!(re.is_match("AAA"));
520        Ok(())
521    }
522
523    #[test]
524    fn test_match_repetition_13() -> Result<()> {
525        let re = Re::new(r"A{,3}")?;
526        assert!(!re.is_match("AAAA"));
527        Ok(())
528    }
529
530    #[test]
531    fn test_match_repetition_14() -> Result<()> {
532        let re = Re::new(r"A{3,}")?;
533        assert!(!re.is_match(""));
534        Ok(())
535    }
536
537    #[test]
538    fn test_match_repetition_15() -> Result<()> {
539        let re = Re::new(r"A{3,}")?;
540        assert!(!re.is_match("AA"));
541        Ok(())
542    }
543
544    #[test]
545    fn test_match_repetition_16() -> Result<()> {
546        let re = Re::new(r"A{3,}")?;
547        assert!(re.is_match("AAA"));
548        Ok(())
549    }
550
551    #[test]
552    fn test_match_repetition_17() -> Result<()> {
553        let re = Re::new(r"A{3,}")?;
554        assert!(re.is_match("AAAA"));
555        Ok(())
556    }
557
558    #[test]
559    fn test_match_repetition_18() -> Result<()> {
560        let re = Re::new(r"A{3}")?;
561        assert!(!re.is_match(""));
562        Ok(())
563    }
564
565    #[test]
566    fn test_match_repetition_19() -> Result<()> {
567        let re = Re::new(r"A{3}")?;
568        assert!(!re.is_match("AA"));
569        Ok(())
570    }
571
572    #[test]
573    fn test_match_repetition_20() -> Result<()> {
574        let re = Re::new(r"A{3}")?;
575        assert!(re.is_match("AAA"));
576        Ok(())
577    }
578
579    #[test]
580    fn test_match_repetition_21() -> Result<()> {
581        let re = Re::new(r"A{3}")?;
582        assert!(!re.is_match("AAAA"));
583        Ok(())
584    }
585
586    #[test]
587    fn test_match_repetition_22() -> Result<()> {
588        let re = Re::new(r"A{2,3}")?;
589        assert!(!re.is_match(""));
590        Ok(())
591    }
592
593    #[test]
594    fn test_match_repetition_23() -> Result<()> {
595        let re = Re::new(r"A{2,3}")?;
596        assert!(re.is_match("AA"));
597        Ok(())
598    }
599
600    #[test]
601    fn test_match_repetition_24() -> Result<()> {
602        let re = Re::new(r"A{2,3}")?;
603        assert!(re.is_match("AAA"));
604        Ok(())
605    }
606
607    #[test]
608    fn test_match_repetition_25() -> Result<()> {
609        let re = Re::new(r"A{2,3}")?;
610        assert!(!re.is_match("AAAA"));
611        Ok(())
612    }
613
614    #[test]
615    fn test_match_complex_1() -> Result<()> {
616        let re = Re::new(r"(A|B)*(AAA|BBB)(A|B)*")?;
617        assert!(re.is_match("ABBBA"));
618        Ok(())
619    }
620
621    #[test]
622    fn test_match_complex_2() -> Result<()> {
623        let re = Re::new(r"(A|B)*(AAA|BBB)(A|B)*")?;
624        assert!(!re.is_match("ABBA"));
625        Ok(())
626    }
627
628    #[test]
629    fn test_match_complex_3() -> Result<()> {
630        let re = Re::new(r"(A|B)*(AAA|BBB)(A|B)*")?;
631        assert!(re.is_match("ABBAAA"));
632        Ok(())
633    }
634
635    #[test]
636    fn test_match_all() -> Result<()> {
637        let re = Re::new(r".")?;
638        assert!(re.is_match("A"));
639        Ok(())
640    }
641
642    struct ExpectedEnfa<S, T> {
643        initial: S,
644        transitions: Set<(S, Segment<T>, S)>,
645        finals: Set<S>,
646    }
647
648    fn assert_enfa_eq<S: Clone + Debug + Ord, T: Clone + Debug + Ord>(expected: ExpectedEnfa<S, T>, actual: Enfa<S, T>) {
649        assert_eq!(expected.initial, actual.states_index(actual.initial_index()).clone());
650        assert_eq!(expected.transitions, actual.transitions_slice(actual.transition_indices()).map(|(source, transition, target)| (actual.states_index(source).clone(), transition.clone(), actual.states_index(target).clone())).collect());
651        assert_eq!(expected.finals, actual.states_slice(actual.final_indices()).cloned().collect());
652    }
653
654    struct ExpectedDfa<S, T> {
655        initial: S,
656        transitions: Set<(S, Segment<T>, S)>,
657        finals: Set<S>,
658    }
659
660    fn assert_dfa_eq<S: Clone + Debug + Ord, T: Clone + Debug + Ord>(expected: ExpectedDfa<Set<S>, T>, actual: Dfa<Set<S>, T>) {
661        assert_eq!(expected.initial, actual.states_index(actual.initial_index()).clone());
662        assert_eq!(expected.transitions, actual.transitions_slice(actual.transition_indices()).map(|(source, transition, target)| (actual.states_index(source).clone(), transition.clone(), actual.states_index(target).clone())).collect());
663        assert_eq!(expected.finals, actual.states_slice(actual.final_indices()).cloned().collect());
664    }
665}