ezregexp/
parser.rs

1//! Parse a regular expression into a pattern
2
3use crate::builder::Pattern;
4use regex_syntax::ast::{
5    parse::Parser, Alternation, Assertion, AssertionKind, Ast, Class, ClassPerl, ClassPerlKind,
6    ClassUnicode, ClassUnicodeKind, Concat, Error, Group, GroupKind, Literal, Repetition, RepetitionKind,
7    RepetitionOp, RepetitionRange,
8};
9
10/// Explain a regex: turn it into a pattern
11pub fn explain(regex: &str) -> Result<Pattern, Error> {
12    let mut p = Parser::new();
13    p.parse(regex).and_then(|a| {
14        //println!("ast: {:?}", a);
15        do_explain(&a)
16    })
17}
18
19/// Do the explaining
20fn do_explain(ast: &Ast) -> Result<Pattern, Error> {
21    match ast {
22        Ast::Concat(Concat { asts, .. }) => Ok(simplify(
23            asts.iter()
24                .map(|a| do_explain(a))
25                .collect::<Result<Vec<Pattern>, Error>>()?,
26        )),
27        Ast::Literal(Literal { c, .. }) => Ok(Pattern::Text(format!("{}", c))),
28        Ast::Alternation(Alternation { asts, .. }) => Ok(Pattern::Or(
29            asts.iter()
30                .map(|a| do_explain(a))
31                .collect::<Result<Vec<Pattern>, Error>>()?,
32        )),
33        Ast::Group(Group { ast, kind:GroupKind::CaptureName(n),.. }) => do_explain(ast).map(|p| Pattern::Named{exp:Box::new(p),name:n.name.clone()}),
34        Ast::Group(Group { ast, .. }) => do_explain(ast),
35        Ast::Repetition(Repetition { ast, op, .. }) => {
36            let bds = bounds(op);
37            Ok(Pattern::Many {
38                exp: Box::new(do_explain(ast)?),
39                low: bds.0,
40                high: bds.1,
41            })
42        }
43        Ast::Class(Class::Perl(ClassPerl {
44            kind: ClassPerlKind::Digit,
45            negated:false,
46            ..
47        })) => Ok(Pattern::Digit),
48        Ast::Class(Class::Perl(ClassPerl {
49            kind: ClassPerlKind::Digit,
50            negated:true,
51            ..
52        })) => Ok(Pattern::Not(Box::new(Pattern::Digit))),
53        Ast::Class(Class::Perl(ClassPerl {
54            kind: ClassPerlKind::Word,
55            negated:false,
56            ..
57        })) => Ok(Pattern::WordCharacter),
58        Ast::Class(Class::Perl(ClassPerl {
59            kind: ClassPerlKind::Word,
60            negated:true,
61            ..
62        })) => Ok(Pattern::Not(Box::new(Pattern::WordCharacter))),
63        Ast::Assertion(Assertion {
64            kind: AssertionKind::StartLine,
65            ..
66        }) => Ok(Pattern::InputStart),
67        Ast::Assertion(Assertion {
68            kind: AssertionKind::EndLine,
69            ..
70        }) => Ok(Pattern::InputEnd),
71        Ast::Class(Class::Unicode(ClassUnicode {
72            kind: ClassUnicodeKind::OneLetter(c),
73            negated: false,
74            ..
75        })) if *c == 'N' => Ok(Pattern::Letter),
76        Ast::Class(Class::Unicode(ClassUnicode {
77            kind: ClassUnicodeKind::OneLetter(c),
78            negated: true,
79            ..
80        })) if *c == 'N' => Ok(Pattern::Not(Box::new(Pattern::Letter))),
81        Ast::Dot{..} => Ok(Pattern::Any),
82        _ => Ok(Pattern::Raw(String::new())),
83    }
84}
85
86/// Extract bound from a RepetitionOp
87fn bounds(op: &RepetitionOp) -> (u32, u32) {
88    match &op.kind {
89        RepetitionKind::ZeroOrOne => (0, 1),
90        RepetitionKind::ZeroOrMore => (0, 0),
91        RepetitionKind::OneOrMore => (1, 0),
92        RepetitionKind::Range(r) => match r {
93            RepetitionRange::AtLeast(m) => (*m, 0),
94            RepetitionRange::Exactly(m) => (*m, *m),
95            RepetitionRange::Bounded(l, h) => (*l, *h),
96        },
97    }
98}
99
100/// Simplify a list of patterns
101fn simplify(exps: Vec<Pattern>) -> Pattern {
102    let mut nexps = vec![];
103    for p in exps.into_iter() {
104        if let Pattern::Text(t) = p {
105            let op0 = nexps.pop();
106            if let Some(Pattern::Text(mut t0)) = op0 {
107                t0.push_str(&t);
108                nexps.push(Pattern::Text(t0));
109            } else {
110                if let Some(p0) = op0 {
111                    nexps.push(p0);
112                }
113                nexps.push(Pattern::Text(t));
114            }
115        } else {
116            if matches!(&p, Pattern::Raw(s) if s.is_empty()){
117                // ignore
118            } else {
119                nexps.push(p);
120            }
121            
122        }
123    }
124    if nexps.len() == 1 {
125        nexps.pop().unwrap()
126    } else {
127        Pattern::Sequence(nexps)
128    }
129}
130
131/*
132struct ExplainState {
133    stack:Vec<State>,
134}
135
136enum State {
137    Root,
138    String,
139    Expression,
140}
141
142impl Default for ExplainState {
143    fn default() -> Self {
144        ExplainState{stack:vec![State::Root]}
145    }
146
147
148}
149
150impl ExplainState {
151    fn string(&mut self) -> bool {
152        if let Some(State::String) = self.stack.last(){
153            return false;
154        } else {
155            self.stack.push(State::String);
156        }
157        true
158    }
159
160    fn unstring(&mut self) -> bool {
161        if let Some(State::String) = self.stack.last(){
162            self.stack.pop();
163            return true;
164        }
165        false
166    }
167
168    fn is_root(&self) -> bool {
169        if let Some(State::Root) = self.stack.last(){
170            return true;
171        }
172        false
173    }
174}
175*/
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180    use crate::builder::ToCode;
181
182    #[test]
183    fn test_basic_explain() {
184        assert_explain(r#"text("Handel")"#,"Handel");
185        assert_explain(r#"word_character()"#,r"\w");
186        assert_explain(r#"letter()"#,r"\pN");
187        assert_explain(r#"either(("gray", "grey"))"#,"gray|grey");
188        assert_explain(r#"start_with("gr").and_either(("a", "e")).and_then("y")"#,"gr(a|e)y");
189        assert_explain(r#"start_with("colo").and_maybe("u").and_then("r")"#,"colou?r");
190        assert_explain(r#"digit().many(2, 3)"#,r#"\d{2,3}"#);
191        assert_explain(r#"at_start().and_then(digit()).times(4).and_then("-").and_then(digit()).times(2).and_then("-").and_then(digit()).times(2).must_end()"#,r"^\d{4}-\d{2}-\d{2}$");
192        assert_explain(r#"any_except(digit()).and_then(any_except(letter())).and_then(any_except(word_character()))"#,r#"\D\PN\W"#);
193        assert_explain(r#"start_with(digit().times(4).named("y")).and_then("-").and_then(digit().times(2).named("m")).and_then("-").and_then(digit().times(2).named("d"))"#,r#"(?x)
194    (?P<y>\d{4})  # the year
195    -
196    (?P<m>\d{2}) # the month
197    -
198    (?P<d>\d{2})   # the day
199    "#);
200    }
201
202    fn assert_explain(expected: &str, regex: &str){
203        assert_eq!(
204            Ok(expected.to_owned()),
205            explain(regex).map(|p| p.to_code())
206        );
207    }
208}