cfg_regex/
lib.rs

1extern crate cfg;
2extern crate regex_syntax;
3
4pub use regex_syntax::hir::{
5    Hir,
6    HirKind,
7    Literal,
8    Class,
9    ClassUnicode,
10    ClassBytes,
11    ClassUnicodeRange,
12    ClassBytesRange,
13    RepetitionKind,
14    RepetitionRange,
15};
16
17use std::iter;
18
19use cfg::{Cfg, Symbol, ContextFree};
20use cfg::history::RewriteSequence;
21use regex_syntax::{Result, Parser};
22
23struct ClassMap {
24    map: Vec<(Class, Symbol)>,
25}
26
27impl ClassMap {
28    fn get(&self, key: &Class) -> Option<Symbol> {
29        for &(ref class, sym) in &self.map {
30            if class == key {
31                return Some(sym);
32            }
33        }
34        None
35    }
36
37    fn insert(&mut self, key: Class, value: Symbol) -> Option<Symbol> {
38        for &mut (ref class, ref mut sym) in &mut self.map {
39            if class == &key {
40                let old_value = *sym;
41                *sym = value;
42                return Some(old_value);
43            }
44        }
45        self.map.push((key, value));
46        None
47    }
48
49    fn contains_class(&self, key: &Class) -> bool {
50        for &(ref class, _) in &self.map {
51            if class == key {
52                return true;
53            }
54        }
55        false
56    }
57
58    fn get_or_insert_with<F>(&mut self, key: Class, f: F) -> Symbol
59        where F: FnOnce() -> Symbol,
60    {
61        for &(ref class, sym) in &self.map {
62            if class == &key {
63                return sym;
64            }
65        }
66        let sym = f();
67        self.map.push((key, sym));
68        sym
69    }
70}
71
72pub struct RegexTranslation<'a, H> {
73    // Every distinct class has a terminal or nonterminal symbol.
74    // Empty class [] points to a nulling symbol, if used.
75    classes: ClassMap,
76    // A grammar.
77    cfg: &'a mut Cfg<H>,
78}
79
80impl<'a, H> RegexTranslation<'a, H>
81    where H: Clone + Default + RewriteSequence<Rewritten = H>
82{
83    pub fn new(cfg: &'a mut Cfg<H>) -> RegexTranslation<'a, H> {
84        RegexTranslation {
85            classes: ClassMap {
86                map: vec![]
87            },
88            cfg,
89        }
90    }
91
92    pub fn change_cfg<'b, H2>(self, other_cfg: &'b mut Cfg<H2>) -> RegexTranslation<'b, H2>
93        where H2: Clone + Default + RewriteSequence<Rewritten = H>
94    {
95        RegexTranslation {
96            classes: self.classes,
97            cfg: other_cfg,
98        }
99    }
100    
101    pub fn rewrite_string(&mut self, string: &str) -> Symbol {
102        let factors = string.chars().map(|ch| self.rewrite_char(ch)).collect::<Vec<_>>();
103        let lhs = self.cfg.sym();
104        self.cfg.rule(lhs).rhs_with_history(&factors[..], H::default());
105        lhs
106    }
107
108    pub fn rewrite_regex(&mut self, regex: &str) -> Result<Symbol> {
109        Parser::new().parse(regex).map(|hir| self.rewrite_hir(&hir))
110    }
111
112    fn rewrite_hir(&mut self, hir: &Hir) -> Symbol {
113        match hir.kind() {
114            &HirKind::Empty => {
115                self.empty()
116            }
117            &HirKind::Literal(Literal::Unicode(ch)) => {
118                self.rewrite_char(ch)
119            }
120            &HirKind::Literal(Literal::Byte(byte)) => {
121                self.rewrite_byte(byte)
122            }
123            &HirKind::Class(ref class) => {
124                self.rewrite_class(class)
125            }
126            &HirKind::Concat(ref factors) => {
127                let mut rhs = vec![];
128                for factor in factors {
129                    let factor_sym = self.rewrite_hir(factor);
130                    rhs.push(factor_sym);
131                }
132                let lhs = self.cfg.sym();
133                self.cfg.rule(lhs).rhs_with_history(&rhs[..], H::default());
134                lhs
135            }
136            // Beware! This code was written after some strong alcohocil drinks.
137            &HirKind::Repetition(ref repetition) => {
138                assert!(repetition.greedy);
139                let (min, max) = match repetition.kind {
140                    RepetitionKind::Range(RepetitionRange::Exactly(x)) => (x, Some(x)),
141                    RepetitionKind::Range(RepetitionRange::AtLeast(x)) => (x, None),
142                    RepetitionKind::Range(RepetitionRange::Bounded(x, y)) => (x, Some(y)),
143                    RepetitionKind::ZeroOrOne => (0, Some(1)),
144                    RepetitionKind::ZeroOrMore => (0, None),
145                    RepetitionKind::OneOrMore => (1, None),
146                };
147                let lhs = self.cfg.sym();
148                let inner_sym = self.rewrite_hir(&*repetition.hir);
149                self.cfg.sequence(lhs).inclusive(min, max).rhs_with_history(inner_sym, H::default());
150                lhs
151            }
152            &HirKind::Group(ref group) => {
153                let lhs = self.cfg.sym();
154                let inner_sym = self.rewrite_hir(&*group.hir);
155                self.cfg.rule(lhs).rhs_with_history([inner_sym], H::default());
156                lhs
157            }
158            &HirKind::Alternation(ref summands) => {
159                let lhs = self.cfg.sym();
160                for summand in summands {
161                    let summand_sym = self.rewrite_hir(summand);
162                    self.cfg.rule(lhs).rhs_with_history([summand_sym], H::default());
163                }
164                lhs
165            }
166            &HirKind::Anchor(..) | &HirKind::WordBoundary(..) => {
167                panic!();
168            }
169        }
170    }
171
172    fn rewrite_char(&mut self, ch: char) -> Symbol {
173        let range = ClassUnicodeRange::new(ch, ch);
174        let class = ClassUnicode::new(iter::once(range));
175        let cfg = &mut self.cfg;
176        self.classes.get_or_insert_with(Class::Unicode(class), || {
177            cfg.sym()
178        })
179    }
180
181    fn rewrite_byte(&mut self, byte: u8) -> Symbol {
182        let range = ClassBytesRange::new(byte, byte);
183        let class = ClassBytes::new(iter::once(range));
184        let cfg = &mut self.cfg;
185        self.classes.get_or_insert_with(Class::Bytes(class), || {
186            cfg.sym()
187        })
188    }
189
190    fn rewrite_class(&mut self, class: &Class) -> Symbol {
191        match class {
192            &Class::Unicode(ref unicode) => {
193                if self.classes.contains_class(class) {
194                    self.classes.get(class).unwrap()
195                } else {
196                    let class_sym = self.cfg.sym();
197                    for range in unicode.iter() {
198                        let key = Class::Unicode(ClassUnicode::new(iter::once(range.clone())));
199                        let cfg = &mut self.cfg;
200                        let range_sym = self.classes.get_or_insert_with(key, || {
201                            cfg.sym()
202                        });
203                        // if unicode.ranges().len() > 1 {
204                        // else simplify
205                        // }
206                        self.cfg.rule(class_sym).rhs_with_history([range_sym], H::default());
207                    }
208                    self.classes.insert(class.clone(), class_sym);
209                    class_sym
210                }
211            }
212            &Class::Bytes(ref bytes) => {
213                if self.classes.contains_class(class) {
214                    self.classes.get(class).unwrap()
215                } else {
216                    let class_sym = self.cfg.sym();
217                    for range in bytes.iter() {
218                        let key = Class::Bytes(ClassBytes::new(iter::once(range.clone())));
219                        let cfg = &mut self.cfg;
220                        let range_sym = self.classes.get_or_insert_with(key, || {
221                            cfg.sym()
222                        });
223                        // if unicode.ranges().len() > 1 {
224                        // else simplify
225                        // }
226                        self.cfg.rule(class_sym).rhs_with_history([range_sym], H::default());
227                    }
228                    self.classes.insert(class.clone(), class_sym);
229                    class_sym
230                }
231            }
232        }
233    }
234
235    fn empty(&mut self) -> Symbol {
236        let empty_class_unicode = Class::Unicode(ClassUnicode::empty());
237        let empty_class_bytes = Class::Bytes(ClassBytes::empty());
238        let cfg = &mut self.cfg;
239        let value = self.classes.get_or_insert_with(empty_class_unicode, || {
240            let empty = cfg.sym();
241            cfg.rule(empty).rhs_with_history([], H::default());
242            empty
243        });
244        self.classes.insert(empty_class_bytes, value);
245        value
246    }
247
248    pub fn class_map(&self) -> &Vec<(Class, Symbol)> {
249        &self.classes.map
250    }
251}