1use crate::builder::Pattern;
4use regex_syntax::ast::{
5 parse::Parser, Alternation, Assertion, AssertionKind, Ast, Class, ClassPerl, ClassPerlKind,
6 ClassUnicode, ClassUnicodeKind, Concat, Error, Group, GroupKind, Literal, Repetition, RepetitionKind,
7 RepetitionOp, RepetitionRange,
8};
9
10pub fn explain(regex: &str) -> Result<Pattern, Error> {
12 let mut p = Parser::new();
13 p.parse(regex).and_then(|a| {
14 do_explain(&a)
16 })
17}
18
19fn do_explain(ast: &Ast) -> Result<Pattern, Error> {
21 match ast {
22 Ast::Concat(Concat { asts, .. }) => Ok(simplify(
23 asts.iter()
24 .map(|a| do_explain(a))
25 .collect::<Result<Vec<Pattern>, Error>>()?,
26 )),
27 Ast::Literal(Literal { c, .. }) => Ok(Pattern::Text(format!("{}", c))),
28 Ast::Alternation(Alternation { asts, .. }) => Ok(Pattern::Or(
29 asts.iter()
30 .map(|a| do_explain(a))
31 .collect::<Result<Vec<Pattern>, Error>>()?,
32 )),
33 Ast::Group(Group { ast, kind:GroupKind::CaptureName(n),.. }) => do_explain(ast).map(|p| Pattern::Named{exp:Box::new(p),name:n.name.clone()}),
34 Ast::Group(Group { ast, .. }) => do_explain(ast),
35 Ast::Repetition(Repetition { ast, op, .. }) => {
36 let bds = bounds(op);
37 Ok(Pattern::Many {
38 exp: Box::new(do_explain(ast)?),
39 low: bds.0,
40 high: bds.1,
41 })
42 }
43 Ast::Class(Class::Perl(ClassPerl {
44 kind: ClassPerlKind::Digit,
45 negated:false,
46 ..
47 })) => Ok(Pattern::Digit),
48 Ast::Class(Class::Perl(ClassPerl {
49 kind: ClassPerlKind::Digit,
50 negated:true,
51 ..
52 })) => Ok(Pattern::Not(Box::new(Pattern::Digit))),
53 Ast::Class(Class::Perl(ClassPerl {
54 kind: ClassPerlKind::Word,
55 negated:false,
56 ..
57 })) => Ok(Pattern::WordCharacter),
58 Ast::Class(Class::Perl(ClassPerl {
59 kind: ClassPerlKind::Word,
60 negated:true,
61 ..
62 })) => Ok(Pattern::Not(Box::new(Pattern::WordCharacter))),
63 Ast::Assertion(Assertion {
64 kind: AssertionKind::StartLine,
65 ..
66 }) => Ok(Pattern::InputStart),
67 Ast::Assertion(Assertion {
68 kind: AssertionKind::EndLine,
69 ..
70 }) => Ok(Pattern::InputEnd),
71 Ast::Class(Class::Unicode(ClassUnicode {
72 kind: ClassUnicodeKind::OneLetter(c),
73 negated: false,
74 ..
75 })) if *c == 'N' => Ok(Pattern::Letter),
76 Ast::Class(Class::Unicode(ClassUnicode {
77 kind: ClassUnicodeKind::OneLetter(c),
78 negated: true,
79 ..
80 })) if *c == 'N' => Ok(Pattern::Not(Box::new(Pattern::Letter))),
81 Ast::Dot{..} => Ok(Pattern::Any),
82 _ => Ok(Pattern::Raw(String::new())),
83 }
84}
85
86fn bounds(op: &RepetitionOp) -> (u32, u32) {
88 match &op.kind {
89 RepetitionKind::ZeroOrOne => (0, 1),
90 RepetitionKind::ZeroOrMore => (0, 0),
91 RepetitionKind::OneOrMore => (1, 0),
92 RepetitionKind::Range(r) => match r {
93 RepetitionRange::AtLeast(m) => (*m, 0),
94 RepetitionRange::Exactly(m) => (*m, *m),
95 RepetitionRange::Bounded(l, h) => (*l, *h),
96 },
97 }
98}
99
100fn simplify(exps: Vec<Pattern>) -> Pattern {
102 let mut nexps = vec![];
103 for p in exps.into_iter() {
104 if let Pattern::Text(t) = p {
105 let op0 = nexps.pop();
106 if let Some(Pattern::Text(mut t0)) = op0 {
107 t0.push_str(&t);
108 nexps.push(Pattern::Text(t0));
109 } else {
110 if let Some(p0) = op0 {
111 nexps.push(p0);
112 }
113 nexps.push(Pattern::Text(t));
114 }
115 } else {
116 if matches!(&p, Pattern::Raw(s) if s.is_empty()){
117 } else {
119 nexps.push(p);
120 }
121
122 }
123 }
124 if nexps.len() == 1 {
125 nexps.pop().unwrap()
126 } else {
127 Pattern::Sequence(nexps)
128 }
129}
130
131#[cfg(test)]
178mod tests {
179 use super::*;
180 use crate::builder::ToCode;
181
182 #[test]
183 fn test_basic_explain() {
184 assert_explain(r#"text("Handel")"#,"Handel");
185 assert_explain(r#"word_character()"#,r"\w");
186 assert_explain(r#"letter()"#,r"\pN");
187 assert_explain(r#"either(("gray", "grey"))"#,"gray|grey");
188 assert_explain(r#"start_with("gr").and_either(("a", "e")).and_then("y")"#,"gr(a|e)y");
189 assert_explain(r#"start_with("colo").and_maybe("u").and_then("r")"#,"colou?r");
190 assert_explain(r#"digit().many(2, 3)"#,r#"\d{2,3}"#);
191 assert_explain(r#"at_start().and_then(digit()).times(4).and_then("-").and_then(digit()).times(2).and_then("-").and_then(digit()).times(2).must_end()"#,r"^\d{4}-\d{2}-\d{2}$");
192 assert_explain(r#"any_except(digit()).and_then(any_except(letter())).and_then(any_except(word_character()))"#,r#"\D\PN\W"#);
193 assert_explain(r#"start_with(digit().times(4).named("y")).and_then("-").and_then(digit().times(2).named("m")).and_then("-").and_then(digit().times(2).named("d"))"#,r#"(?x)
194 (?P<y>\d{4}) # the year
195 -
196 (?P<m>\d{2}) # the month
197 -
198 (?P<d>\d{2}) # the day
199 "#);
200 }
201
202 fn assert_explain(expected: &str, regex: &str){
203 assert_eq!(
204 Ok(expected.to_owned()),
205 explain(regex).map(|p| p.to_code())
206 );
207 }
208}