rusty_peg/
util.rs

1use super::{Cache, Error, Input, Symbol, ParseResult};
2use regex::Regex;
3use std::collections::HashSet;
4
5// used by macro expansion
6pub use std::marker::PhantomData;
7pub use std::collections::HashMap;
8pub use std::rc::Rc;
9
10// ID :=
11//     "[a-zA-Z]+"
12// FOO :=
13//     ( "class" ID "{" {MEMBER} "}" )
14// MEMBER :=
15//     ( FIELD  ) |
16//     ( METHOD )
17// FIELD :=
18//     ( ID ":" TYPE )
19// METHOD :=
20//     ("void" ID "{")
21// NONTERMINAL := A | B | C ;
22// NONTERMINAL == <expr> ;
23// ()         ==> grouping
24// []         ==> optional
25// {}         ==> zero or more
26// A B        ==> whitespace separated
27// identifier ==> non-terminal
28// "xyz"      ==> regular expression
29// we build a parse tree that keeps each non-terminal
30
31#[derive(Debug)]
32pub struct Or<NT1,P2> {
33    pub a: NT1,
34    pub b: P2,
35}
36
37impl<'input,NT1,P2,R,G> Symbol<'input,G> for Or<NT1,P2>
38    where NT1: Symbol<'input,G,Output=R>, P2: Symbol<'input,G,Output=R>
39{
40    type Output = R;
41
42    fn pretty_print(&self) -> String {
43        format!("({} | {})", self.a.pretty_print(), self.b.pretty_print())
44    }
45
46    fn parse(&self, grammar: &mut G, start: Input<'input>)
47                 -> ParseResult<'input,R>
48    {
49        match self.a.parse(grammar, start) {
50            Ok(success) => { return Ok(success); }
51            Err(_) => { }
52        }
53
54        self.b.parse(grammar, start)
55    }
56}
57
58#[derive(Debug)]
59pub struct Join<NT1,P2> {
60    pub first: NT1,
61    pub second: P2,
62}
63
64impl<'input,NT1,P2,G> Symbol<'input,G> for Join<NT1,P2>
65    where NT1: Symbol<'input,G>, P2: Symbol<'input,G>
66{
67    type Output = (NT1::Output, P2::Output);
68
69    fn pretty_print(&self) -> String {
70        format!("{} {}", self.first.pretty_print(), self.second.pretty_print())
71    }
72
73    fn parse(&self, grammar: &mut G, start: Input<'input>)
74                 -> ParseResult<'input,(NT1::Output,P2::Output)>
75    {
76        let (mid, first) = try!(self.first.parse(grammar, start));
77        let (sep, ()) = try!(Whitespace.parse(grammar, mid));
78        let (end, second) = try!(self.second.parse(grammar, sep));
79        Ok((end, (first, second)))
80    }
81}
82
83#[derive(Debug)]
84pub struct Empty;
85
86impl<'input,G> Symbol<'input,G> for Empty {
87    type Output = ();
88
89    fn pretty_print(&self) -> String {
90        format!("()")
91    }
92
93    fn parse(&self, _: &mut G, start: Input<'input>)
94                 -> ParseResult<'input,()>
95    {
96        Ok((start, ()))
97    }
98}
99
100#[derive(Debug)]
101pub struct Whitespace;
102
103impl<'input,G> Symbol<'input,G> for Whitespace {
104    type Output = ();
105
106    fn pretty_print(&self) -> String {
107        format!("Whitespace")
108    }
109
110    fn parse(&self, _: &mut G, start: Input<'input>)
111             -> ParseResult<'input,()>
112    {
113        Ok((skip_whitespace(start), ()))
114    }
115}
116
117pub fn skip_whitespace<'input>(mut input: Input<'input>) -> Input<'input> {
118    let bytes = input.text.as_bytes();
119    while input.offset < input.text.len() && is_space(bytes[input.offset]) {
120        input.offset += 1;
121    }
122    return input;
123
124    fn is_space(c: u8) -> bool {
125        match c as char {
126            ' ' => true,
127            '\n' => true,
128            _ => false,
129        }
130    }
131}
132
133impl<'input,G> Symbol<'input,G> for &'static str {
134    type Output = &'static str;
135
136    fn pretty_print(&self) -> String {
137        format!("{:?}", self)
138    }
139
140    fn parse(&self, _: &mut G, start: Input<'input>) -> ParseResult<'input,&'static str> {
141        let text = *self;
142        if start.text[start.offset..].starts_with(text) {
143            let end = start.offset_by(text.len());
144            Ok((end, text))
145        } else {
146            Err(Error { expected: text, offset: start.offset })
147        }
148    }
149}
150
151#[derive(Debug)]
152pub struct Optional<P> {
153    parser: P
154}
155
156impl<'input,G,P> Symbol<'input,G> for Optional<P>
157    where P: Symbol<'input,G>
158{
159    type Output = Option<P::Output>;
160
161    fn pretty_print(&self) -> String {
162        format!("[{}]", self.parser.pretty_print())
163    }
164
165    fn parse(&self, grammar: &mut G, start: Input<'input>)
166                 -> ParseResult<'input,Option<P::Output>>
167    {
168        match self.parser.parse(grammar, start) {
169            Ok((end, result)) => Ok((end, Some(result))),
170            Err(_) => Ok((start, None))
171        }
172    }
173}
174
175#[derive(Debug)]
176pub struct Repeat<P,S> {
177    pub parser: P,
178    pub separator: S,
179    pub min: usize,
180}
181
182impl<'input,G,P,S> Symbol<'input,G> for Repeat<P,S>
183    where P: Symbol<'input,G>, S: Symbol<'input,G>
184{
185    type Output = Vec<P::Output>;
186
187    fn pretty_print(&self) -> String {
188        match self.min {
189            0 => format!("{{{}}}", self.parser.pretty_print()),
190            1 => format!("{{+ {}}}", self.parser.pretty_print()),
191            _ => format!("{{#{} {}}}", self.min, self.parser.pretty_print()),
192        }
193    }
194
195    fn parse(&self, grammar: &mut G, start: Input<'input>)
196             -> ParseResult<'input,Vec<P::Output>>
197    {
198        let mut mid = start;
199        let mut children = vec![];
200        let mut err;
201        loop {
202            match self.parser.parse(grammar, mid) {
203                Ok((end, result)) => {
204                    children.push(result);
205
206                    match self.separator.parse(grammar, end) {
207                        Ok((sep_end, _)) => {
208                            mid = sep_end;
209                        }
210
211                        Err(e) => {
212                            err = e;
213                            break;
214                        }
215                    }
216                }
217                Err(e) => {
218                    err = e;
219                    break;
220                }
221            }
222        }
223        if children.len() >= self.min {
224            return Ok((mid, children));
225        } else {
226            return Err(err);
227        }
228    }
229}
230
231#[derive(Debug)]
232pub struct RegexNt {
233    regex: Regex,
234    exceptions: HashSet<String>,
235}
236
237impl RegexNt {
238    pub fn new(text: &str, exceptions: HashSet<String>) -> RegexNt {
239        // we always want the regex anchored to the start of the string
240        let text = format!("^{}", text);
241        RegexNt {regex: Regex::new(&text).unwrap(), exceptions: exceptions}
242    }
243}
244
245impl<'input,G> Symbol<'input,G> for RegexNt {
246    type Output = &'input str;
247
248    fn pretty_print(&self) -> String {
249        format!("{:?}", self)
250    }
251
252    fn parse(&self,
253             _: &mut G,
254             start: ::Input<'input>)
255             -> ::ParseResult<'input,&'input str>
256    {
257        match self.regex.find(&start.text[start.offset..]) {
258            Some((_, offset)) => {
259                let end = start.offset_by(offset);
260                let matched = &start.text[start.offset..end.offset];
261                if !self.exceptions.contains(matched) {
262                    return Ok((end, matched));
263                }
264            }
265            None => { }
266        }
267
268        Err(::Error { expected: "regex", offset: start.offset })
269    }
270}
271
272pub fn memoize<'input,P,T:Clone,ComputeFn,CacheFn>(
273    parser: &mut P,
274    mut cache_fn: CacheFn,
275    offset: usize,
276    compute_fn: ComputeFn)
277    -> ParseResult<'input,T>
278    where
279    CacheFn: FnMut(&mut P) -> &mut Cache<'input,T>,
280    ComputeFn: FnOnce(&mut P) -> ParseResult<'input,T>,
281{
282    {
283        let cache = cache_fn(parser);
284        match cache.get(&offset) {
285            Some(p) => { return p.clone(); }
286            None => { }
287        }
288    }
289
290    let result = compute_fn(parser);
291
292    let cache = cache_fn(parser);
293    cache.insert(offset, result.clone());
294
295    result
296}