rex_regex/
repr.rs

1//! The repr module is concerned with the representation of parsed regular expressions. A Pattern
2//! is compiled by the `compile` module into a state graph defined in `state`.
3#![allow(dead_code)]
4
5/// A Pattern is either a repeated pattern, a stored submatch, an alternation between two patterns,
6/// two patterns following each other, or a character range or set.
7#[derive(Clone, Debug, PartialEq)]
8pub enum Pattern {
9    Concat(Vec<Pattern>),
10    /// A repeated sub-pattern.
11    Repeated(Box<Repetition>),
12    /// A stored submatch.
13    Submatch(Box<Pattern>),
14    /// An alternation between patterns (a|bb|ccc)
15    Alternate(Vec<Pattern>),
16    /// A single character.
17    Char(char),
18    /// Any character (.).
19    Any,
20    /// A string.
21    Str(String),
22    /// A character range.
23    CharRange(char, char),
24    /// A set of characters.
25    CharSet(Vec<char>),
26    /// A position anchor.
27    Anchor(AnchorLocation),
28}
29
30/// `AnchorLocation` encodes `^` and `$` anchors, respectively.
31#[derive(Clone, Debug, PartialEq)]
32pub enum AnchorLocation {
33    Begin,
34    End,
35}
36
37/// A pattern can be repeated in various manners, which is represented by the pattern being wrapped
38/// in a Repetition.
39///
40/// The inner type is a pattern, because a repetition is either concerned with only one pattern
41/// (`/.?/`), or a submatch (`/(abc)?/`).
42#[derive(Clone, Debug, PartialEq)]
43pub enum Repetition {
44    /// /P+/
45    ZeroOrOnce(Pattern),
46    /// /P*/
47    ZeroOrMore(Pattern),
48    /// /P+/
49    OnceOrMore(Pattern),
50    /// /P{min, (max)}/
51    Specific(Pattern, u32, Option<u32>),
52}
53
54#[cfg(test)]
55mod tests {
56    use super::*;
57    use crate::state::*;
58
59    // /a(b|c)/
60    fn simple_re0() -> Pattern {
61        Pattern::Concat(vec![
62            Pattern::CharRange('a', 'a'),
63            Pattern::Alternate(vec![(Pattern::Char('b')), (Pattern::Char('c'))]),
64        ])
65    }
66    // Returns compiled form of /(a[bc])?(cd)*(e|f)+x{1,3}(g|hh|i)j{2,}klm/
67    fn simple_re1() -> Pattern {
68        Pattern::Concat(vec![
69            Pattern::Repeated(Box::new(Repetition::ZeroOrOnce(Pattern::Submatch(
70                Box::new(Pattern::Concat(vec![
71                    Pattern::Char('a'),
72                    Pattern::CharRange('b', 'c'),
73                ])),
74            )))),
75            Pattern::Repeated(Box::new(Repetition::ZeroOrMore(Pattern::Submatch(
76                Box::new(Pattern::Concat(vec![
77                    Pattern::Char('c'),
78                    Pattern::Char('d'),
79                ])),
80            )))),
81            Pattern::Submatch(Box::new(Pattern::Repeated(Box::new(
82                Repetition::OnceOrMore(Pattern::Alternate(vec![
83                    (Pattern::Char('e')),
84                    (Pattern::Char('f')),
85                ])),
86            )))),
87            Pattern::Repeated(Box::new(Repetition::Specific(
88                Pattern::Char('x'),
89                1,
90                Some(3),
91            ))),
92            Pattern::Alternate(vec![
93                Pattern::Char('g'),
94                Pattern::Repeated(Box::new(Repetition::Specific(
95                    Pattern::Char('h'),
96                    2,
97                    Some(2),
98                ))),
99                (Pattern::Char('i')),
100            ]),
101            Pattern::Repeated(Box::new(Repetition::Specific(Pattern::Char('j'), 2, None))),
102            Pattern::Str("klm".to_string()),
103        ])
104    }
105
106    use crate::compile::start_compile;
107
108    #[test]
109    fn test_re1() {
110        // println!("{:?}", start_compile(simple_re0()));
111        let dot = dot(&start_compile(&simple_re1()));
112        println!("digraph st {{ {} }}", dot);
113    }
114}