nameless_peg_parser/peg/
expression.rs

1use crate::peg::grammar::PEG;
2use crate::peg::parsing::{Capture, ParserError, ParserErrorCode, ParserOutput, Token};
3use log::debug;
4use std::collections::HashSet;
5use std::fmt::{Display, Formatter};
6use std::rc::Rc;
7
8// might need inner structs
9#[derive(Clone, Debug)]
10pub enum Expression {
11    Empty,
12    Any,
13    Literal(String),
14    NonTerminal(String),   // references a rule
15    Range(String, String),
16    Class(HashSet<String>),
17    Group(Rc<Box<Expression>>),
18    ZeroOrMore(Rc<Box<Expression>>),
19    OneOrMore(Rc<Box<Expression>>),
20    Optional(Rc<Box<Expression>>),
21    And(Rc<Box<Expression>>),
22    Not(Rc<Box<Expression>>),
23    Choice(Vec<Expression>), // generalizing it to any number of choices
24    Sequence(Vec<Expression>), // generalizing it to any number of sequences
25}
26
27impl Expression {
28    pub fn into_rc_box(self) -> Rc<Box<Expression>> {
29        Rc::new(Box::new(self))
30    }
31
32    pub fn make_class(symbols: &[&str]) -> Expression {
33        let mut class: HashSet<String> = Default::default();
34        for s in symbols.iter().map(|c| c.to_string()) {
35            class.insert(s);
36        }
37
38        Expression::Class(class)
39    }
40
41    pub fn parse(&self, peg: &PEG, input: &str, cursor: usize, depth: usize) -> ParserOutput {
42        match self {
43            Expression::Empty => ParserOutput(1, cursor, Ok(vec![])),
44            Expression::Any => {
45                if cursor < input.len() {
46                    ParserOutput(
47                        1,
48                        cursor + 1,
49                        Ok(vec![Token::Terminal(Capture(cursor, cursor + 1))]),
50                    )
51                } else {
52                    ParserOutput(
53                        1,
54                        cursor,
55                        Err(ParserError {
56                            position: cursor,
57                            expression: self.clone(),
58                            error: ParserErrorCode::UnexpectedEndOfInput,
59                            cause: None,
60                        }),
61                    )
62                }
63            }
64            Expression::Literal(pattern) => {
65                let input = &input[cursor..];
66                let len = pattern.len();
67                if input.starts_with(pattern) {
68                    ParserOutput(
69                        len as u32,
70                        cursor + len,
71                        Ok(vec![Token::Terminal(Capture(cursor, cursor + len))]),
72                    )
73                } else {
74                    // eh, not quite what I wrote above, but close enough (cost is potentially too large)...
75                    ParserOutput(
76                        len as u32,
77                        cursor,
78                        Err(ParserError {
79                            position: cursor,
80                            expression: self.clone(),
81                            error: ParserErrorCode::ExpressionDoesNotMatch,
82                            cause: None,
83                        }),
84                    )
85                }
86            }
87            Expression::NonTerminal(nt) => {
88                let rule = peg
89                    .rules
90                    .get(nt)
91                    .expect(format!("Encountered unknown NonTerminal symbol: {}", nt).as_str());
92
93                debug!("{}parsing {nt} @ {cursor}: {rule}", "│".repeat(depth));
94
95                let ParserOutput(cost, new_cursor, res) = rule.parse(peg, input, cursor, depth + 1);
96
97                let res = match res {
98                    Ok(matches) => ParserOutput(
99                        cost,
100                        new_cursor,
101                        Ok(vec![Token::NonTerminal(
102                            nt.clone(),
103                            Capture(cursor, new_cursor),
104                            matches,
105                        )]),
106                    ),
107                    Err(inner) => ParserOutput(
108                        cost,
109                        cursor,
110                        Err(ParserError {
111                            position: cursor,
112                            expression: self.clone(),
113                            error: ParserErrorCode::NonTerminalDoesNotMatch,
114                            cause: Some(Box::from(inner)),
115                        }),
116                    ),
117                };
118
119                debug!(
120                    "{}└{} parsing {nt} @ {cursor} advanced to {new_cursor}",
121                    "│".repeat(depth),
122                    if res.2.is_ok() { "success" } else { "failure" }
123                );
124
125                res
126            }
127            Expression::Range(from, to) => {
128                // test this!
129                if (from.as_str()..to.as_str()).contains(&&input[cursor..]) {
130                    // per paper this should have been desugared into a choice and the cost would depend on
131                    // the ordering in the choice, but this can be done in constant time...
132                    ParserOutput(
133                        1,
134                        cursor + 1,
135                        Ok(vec![Token::Terminal(Capture(cursor, cursor + 1))]),
136                    )
137                } else {
138                    ParserOutput(
139                        1,
140                        cursor,
141                        Err(ParserError {
142                            position: cursor,
143                            expression: self.clone(),
144                            error: ParserErrorCode::ExpressionDoesNotMatch,
145                            cause: None,
146                        }),
147                    )
148                }
149            }
150            Expression::Class(class) => {
151                let input = &input[cursor..];
152                let mut cost = 1;
153                for symbol in class {
154                    cost += symbol.len();
155                    if input.starts_with(symbol) {
156                        return ParserOutput(
157                            cost as u32,
158                            cursor + symbol.len(),
159                            Ok(vec![Token::Terminal(Capture(
160                                cursor,
161                                cursor + symbol.len(),
162                            ))]),
163                        );
164                    }
165                }
166                ParserOutput(
167                    cost as u32,
168                    cursor,
169                    Err(ParserError {
170                        position: cursor,
171                        expression: self.clone(),
172                        error: ParserErrorCode::ExpressionDoesNotMatch,
173                        cause: None,
174                    }),
175                )
176            }
177            Expression::Group(expression) => {
178                let ParserOutput(cost, cursor, res) = expression.parse(peg, input, cursor, depth);
179                ParserOutput(cost + 1, cursor, res)
180            }
181            Expression::ZeroOrMore(expression) => {
182                let mut captures = vec![];
183                let mut new_cursor = cursor;
184                let mut running_cost = 1;
185                loop {
186                    let ParserOutput(cost, downstream_cursor, res) =
187                        expression.parse(peg, input, new_cursor, depth);
188                    running_cost += cost;
189                    match res {
190                        Ok(mut matches) => {
191                            new_cursor = downstream_cursor;
192                            captures.append(&mut matches);
193                        }
194                        Err(_) => {
195                            break ParserOutput(running_cost, new_cursor, Ok(captures));
196                        }
197                    }
198                }
199            }
200            Expression::OneOrMore(expression) => {
201                let desugared = Expression::Sequence(vec![
202                    Expression::Group(expression.clone()),
203                    Expression::ZeroOrMore(expression.clone()),
204                ]);
205                desugared.parse(peg, input, cursor, depth)
206            }
207            Expression::Optional(expression) => {
208                let ParserOutput(cost, cursor, res) = expression.parse(peg, input, cursor, depth);
209                match res {
210                    Ok(matches) => ParserOutput(cost + 1, cursor, Ok(matches)),
211                    Err(_) => ParserOutput(cost + 1, cursor, Ok(vec![])),
212                }
213            }
214            Expression::And(peek) => {
215                let ParserOutput(cost, _, res) = peek.parse(peg, input, cursor, depth);
216                match res {
217                    Ok(_) => ParserOutput(cost + 1, cursor, Ok(vec![])),
218                    Err(inner) => ParserOutput(
219                        cost + 1,
220                        cursor,
221                        Err(ParserError {
222                            position: cursor,
223                            expression: self.clone(),
224                            error: ParserErrorCode::ExpressionDoesNotMatch,
225                            cause: Some(Box::from(inner)),
226                        }),
227                    ),
228                }
229            }
230            Expression::Not(peek) => {
231                let ParserOutput(cost, _, res) = peek.parse(peg, input, cursor, depth);
232                match res {
233                    Ok(inner) => ParserOutput(
234                        cost + 1,
235                        cursor,
236                        Err(ParserError {
237                            position: cursor,
238                            expression: self.clone(),
239                            error: ParserErrorCode::NotDidMatch(inner),
240                            cause: None,
241                        }),
242                    ),
243                    Err(_) => ParserOutput(cost + 1, cursor, Ok(vec![])),
244                }
245            }
246            Expression::Choice(choices) => {
247                let mut running_cost = 1;
248                for expression in choices {
249                    let ParserOutput(cost, cursor, res) =
250                        expression.parse(peg, input, cursor, depth);
251                    running_cost += cost;
252                    if let Ok(res) = res {
253                        return ParserOutput(running_cost, cursor, Ok(res));
254                    }
255                }
256
257                ParserOutput(
258                    running_cost,
259                    cursor,
260                    Err(ParserError {
261                        position: cursor,
262                        expression: self.clone(),
263                        error: ParserErrorCode::ExpressionDoesNotMatch,
264                        cause: None,
265                    }),
266                )
267            }
268            Expression::Sequence(sequence) => {
269                let mut running_cost = 1;
270                let mut captures: Vec<Token> = vec![];
271                let mut new_cursor = cursor;
272
273                for expression in sequence {
274                    let ParserOutput(cost, cursor, res) =
275                        expression.parse(peg, input, new_cursor, depth);
276                    running_cost += cost;
277                    match res {
278                        Ok(mut res) => {
279                            captures.append(&mut res);
280                            new_cursor = cursor;
281                        }
282                        Err(inner) => {
283                            return ParserOutput(
284                                running_cost,
285                                cursor,
286                                Err(ParserError {
287                                    position: cursor,
288                                    expression: self.clone(),
289                                    error: ParserErrorCode::ExpressionDoesNotMatch,
290                                    cause: Some(Box::from(inner)),
291                                }),
292                            );
293                        }
294                    }
295                }
296
297                ParserOutput(running_cost, new_cursor, Ok(captures))
298            }
299        }
300    }
301}
302
303fn escape_literal(literal: &String) -> String {
304    literal
305        .replace("\\", "\\\\")
306        .replace("\\\\0", "\\0")
307        .replace("\\\\1", "\\1")
308        .replace("\\\\2", "\\2")
309        .replace("\\\\3", "\\3")
310        .replace("\\\\4", "\\4")
311        .replace("\\\\5", "\\5")
312        .replace("\\\\6", "\\6")
313        .replace("\\\\7", "\\7")
314        .replace("\\\\8", "\\8")
315        .replace("\\\\9", "\\9")
316        .replace("\r", "\\r")
317        .replace("\t", "\\t")
318        .replace("\n", "\\n")
319}
320
321impl Display for Expression {
322    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
323        match self {
324            Expression::Empty => write!(f, "()"),
325            Expression::Any => write!(f, "."),
326            Expression::Literal(literal) => {
327                write!(f, "'{}'", escape_literal(literal).replace("'", "\\'"))
328            }
329            Expression::NonTerminal(token) => write!(f, "{}", token),
330            Expression::Range(a, b) => write!(f, "[{}-{}]", a, b), // there is no way to output [az-x] atm, just a way to parse it
331            Expression::Class(class) => {
332                write!(
333                    f,
334                    "[{}]",
335                    class
336                        .iter()
337                        .map(|c| escape_literal(c).replace("[", "\\[").replace("]", "\\]"))
338                        .collect::<Vec<_>>()
339                        .join("")
340                )
341            }
342            Expression::Group(group) => write!(f, "({})", group),
343            Expression::ZeroOrMore(inner) => write!(f, "{}*", inner),
344            Expression::OneOrMore(inner) => write!(f, "{}+", inner),
345            Expression::Optional(inner) => write!(f, "{}?", inner),
346            Expression::And(inner) => write!(f, "&{}", inner),
347            Expression::Not(inner) => write!(f, "!{}", inner),
348            Expression::Choice(choices) => {
349                if choices.len() > 1 {
350                    write!(
351                        f,
352                        "({})",
353                        choices
354                            .iter()
355                            .map(|c| format!("{}", c))
356                            .collect::<Vec<_>>()
357                            .join(" / ")
358                    )
359                } else {
360                    write!(
361                        f,
362                        "{}",
363                        choices
364                            .iter()
365                            .map(|c| format!("{}", c))
366                            .collect::<Vec<_>>()
367                            .join(" / ")
368                    )
369                }
370            }
371            Expression::Sequence(sequence) => {
372                if sequence.len() > 1 {
373                    write!(
374                        f,
375                        "({})",
376                        sequence
377                            .iter()
378                            .map(|c| format!("{}", c))
379                            .collect::<Vec<_>>()
380                            .join(" ")
381                    )
382                } else {
383                    write!(
384                        f,
385                        "{}",
386                        sequence
387                            .iter()
388                            .map(|c| format!("{}", c))
389                            .collect::<Vec<_>>()
390                            .join(" ")
391                    )
392                }
393            }
394        }
395    }
396}