1extern crate cfg;
2extern crate regex_syntax;
3
4pub use regex_syntax::hir::{
5 Hir,
6 HirKind,
7 Literal,
8 Class,
9 ClassUnicode,
10 ClassBytes,
11 ClassUnicodeRange,
12 ClassBytesRange,
13 RepetitionKind,
14 RepetitionRange,
15};
16
17use std::iter;
18
19use cfg::{Cfg, Symbol, ContextFree};
20use cfg::history::RewriteSequence;
21use regex_syntax::{Result, Parser};
22
23struct ClassMap {
24 map: Vec<(Class, Symbol)>,
25}
26
27impl ClassMap {
28 fn get(&self, key: &Class) -> Option<Symbol> {
29 for &(ref class, sym) in &self.map {
30 if class == key {
31 return Some(sym);
32 }
33 }
34 None
35 }
36
37 fn insert(&mut self, key: Class, value: Symbol) -> Option<Symbol> {
38 for &mut (ref class, ref mut sym) in &mut self.map {
39 if class == &key {
40 let old_value = *sym;
41 *sym = value;
42 return Some(old_value);
43 }
44 }
45 self.map.push((key, value));
46 None
47 }
48
49 fn contains_class(&self, key: &Class) -> bool {
50 for &(ref class, _) in &self.map {
51 if class == key {
52 return true;
53 }
54 }
55 false
56 }
57
58 fn get_or_insert_with<F>(&mut self, key: Class, f: F) -> Symbol
59 where F: FnOnce() -> Symbol,
60 {
61 for &(ref class, sym) in &self.map {
62 if class == &key {
63 return sym;
64 }
65 }
66 let sym = f();
67 self.map.push((key, sym));
68 sym
69 }
70}
71
72pub struct RegexTranslation<'a, H> {
73 classes: ClassMap,
76 cfg: &'a mut Cfg<H>,
78}
79
80impl<'a, H> RegexTranslation<'a, H>
81 where H: Clone + Default + RewriteSequence<Rewritten = H>
82{
83 pub fn new(cfg: &'a mut Cfg<H>) -> RegexTranslation<'a, H> {
84 RegexTranslation {
85 classes: ClassMap {
86 map: vec![]
87 },
88 cfg,
89 }
90 }
91
92 pub fn change_cfg<'b, H2>(self, other_cfg: &'b mut Cfg<H2>) -> RegexTranslation<'b, H2>
93 where H2: Clone + Default + RewriteSequence<Rewritten = H>
94 {
95 RegexTranslation {
96 classes: self.classes,
97 cfg: other_cfg,
98 }
99 }
100
101 pub fn rewrite_string(&mut self, string: &str) -> Symbol {
102 let factors = string.chars().map(|ch| self.rewrite_char(ch)).collect::<Vec<_>>();
103 let lhs = self.cfg.sym();
104 self.cfg.rule(lhs).rhs_with_history(&factors[..], H::default());
105 lhs
106 }
107
108 pub fn rewrite_regex(&mut self, regex: &str) -> Result<Symbol> {
109 Parser::new().parse(regex).map(|hir| self.rewrite_hir(&hir))
110 }
111
112 fn rewrite_hir(&mut self, hir: &Hir) -> Symbol {
113 match hir.kind() {
114 &HirKind::Empty => {
115 self.empty()
116 }
117 &HirKind::Literal(Literal::Unicode(ch)) => {
118 self.rewrite_char(ch)
119 }
120 &HirKind::Literal(Literal::Byte(byte)) => {
121 self.rewrite_byte(byte)
122 }
123 &HirKind::Class(ref class) => {
124 self.rewrite_class(class)
125 }
126 &HirKind::Concat(ref factors) => {
127 let mut rhs = vec![];
128 for factor in factors {
129 let factor_sym = self.rewrite_hir(factor);
130 rhs.push(factor_sym);
131 }
132 let lhs = self.cfg.sym();
133 self.cfg.rule(lhs).rhs_with_history(&rhs[..], H::default());
134 lhs
135 }
136 &HirKind::Repetition(ref repetition) => {
138 assert!(repetition.greedy);
139 let (min, max) = match repetition.kind {
140 RepetitionKind::Range(RepetitionRange::Exactly(x)) => (x, Some(x)),
141 RepetitionKind::Range(RepetitionRange::AtLeast(x)) => (x, None),
142 RepetitionKind::Range(RepetitionRange::Bounded(x, y)) => (x, Some(y)),
143 RepetitionKind::ZeroOrOne => (0, Some(1)),
144 RepetitionKind::ZeroOrMore => (0, None),
145 RepetitionKind::OneOrMore => (1, None),
146 };
147 let lhs = self.cfg.sym();
148 let inner_sym = self.rewrite_hir(&*repetition.hir);
149 self.cfg.sequence(lhs).inclusive(min, max).rhs_with_history(inner_sym, H::default());
150 lhs
151 }
152 &HirKind::Group(ref group) => {
153 let lhs = self.cfg.sym();
154 let inner_sym = self.rewrite_hir(&*group.hir);
155 self.cfg.rule(lhs).rhs_with_history([inner_sym], H::default());
156 lhs
157 }
158 &HirKind::Alternation(ref summands) => {
159 let lhs = self.cfg.sym();
160 for summand in summands {
161 let summand_sym = self.rewrite_hir(summand);
162 self.cfg.rule(lhs).rhs_with_history([summand_sym], H::default());
163 }
164 lhs
165 }
166 &HirKind::Anchor(..) | &HirKind::WordBoundary(..) => {
167 panic!();
168 }
169 }
170 }
171
172 fn rewrite_char(&mut self, ch: char) -> Symbol {
173 let range = ClassUnicodeRange::new(ch, ch);
174 let class = ClassUnicode::new(iter::once(range));
175 let cfg = &mut self.cfg;
176 self.classes.get_or_insert_with(Class::Unicode(class), || {
177 cfg.sym()
178 })
179 }
180
181 fn rewrite_byte(&mut self, byte: u8) -> Symbol {
182 let range = ClassBytesRange::new(byte, byte);
183 let class = ClassBytes::new(iter::once(range));
184 let cfg = &mut self.cfg;
185 self.classes.get_or_insert_with(Class::Bytes(class), || {
186 cfg.sym()
187 })
188 }
189
190 fn rewrite_class(&mut self, class: &Class) -> Symbol {
191 match class {
192 &Class::Unicode(ref unicode) => {
193 if self.classes.contains_class(class) {
194 self.classes.get(class).unwrap()
195 } else {
196 let class_sym = self.cfg.sym();
197 for range in unicode.iter() {
198 let key = Class::Unicode(ClassUnicode::new(iter::once(range.clone())));
199 let cfg = &mut self.cfg;
200 let range_sym = self.classes.get_or_insert_with(key, || {
201 cfg.sym()
202 });
203 self.cfg.rule(class_sym).rhs_with_history([range_sym], H::default());
207 }
208 self.classes.insert(class.clone(), class_sym);
209 class_sym
210 }
211 }
212 &Class::Bytes(ref bytes) => {
213 if self.classes.contains_class(class) {
214 self.classes.get(class).unwrap()
215 } else {
216 let class_sym = self.cfg.sym();
217 for range in bytes.iter() {
218 let key = Class::Bytes(ClassBytes::new(iter::once(range.clone())));
219 let cfg = &mut self.cfg;
220 let range_sym = self.classes.get_or_insert_with(key, || {
221 cfg.sym()
222 });
223 self.cfg.rule(class_sym).rhs_with_history([range_sym], H::default());
227 }
228 self.classes.insert(class.clone(), class_sym);
229 class_sym
230 }
231 }
232 }
233 }
234
235 fn empty(&mut self) -> Symbol {
236 let empty_class_unicode = Class::Unicode(ClassUnicode::empty());
237 let empty_class_bytes = Class::Bytes(ClassBytes::empty());
238 let cfg = &mut self.cfg;
239 let value = self.classes.get_or_insert_with(empty_class_unicode, || {
240 let empty = cfg.sym();
241 cfg.rule(empty).rhs_with_history([], H::default());
242 empty
243 });
244 self.classes.insert(empty_class_bytes, value);
245 value
246 }
247
248 pub fn class_map(&self) -> &Vec<(Class, Symbol)> {
249 &self.classes.map
250 }
251}