lwb_parser/parser/peg/
parser_core_expression.rs

1#![allow(clippy::result_unit_err)]
2
3use crate::parser::peg::parse_error::{Expect, PEGParseError};
4use crate::parser::peg::parse_result::ParseResult;
5use crate::parser::peg::parser_core::{ParserContext, ParserState};
6use crate::parser::peg::parser_core_ast::{CoreExpression, CoreSort, ParsePairRaw};
7use crate::parser::peg::parser_sugar_ast::Annotation;
8use crate::sources::source_file::SourceFileIterator;
9use crate::sources::span::Span;
10
11pub struct ExpressionContext<'src> {
12    pub name: Option<&'src str>,
13    pub error: Option<&'src String>,
14}
15
16impl<'src> ExpressionContext<'src> {
17    pub fn empty() -> Self {
18        Self {
19            name: None,
20            error: None,
21        }
22    }
23}
24
25/// Given an expression and the current position, attempts to parse this constructor.
26pub fn parse_expression_name<'src>(
27    state: &ParserContext<'src>,
28    cache: &mut ParserState<'src>,
29    expr_name: &'src str,
30    pos: SourceFileIterator<'src>,
31) -> ParseResult<'src, ParsePairRaw> {
32    let sort: &'src CoreSort = state
33        .ast
34        .sorts
35        .get(expr_name)
36        .expect("name is guaranteed to exist");
37
38    let expr = &sort.expr;
39    let sort_context = ExpressionContext {
40        name: Some(sort.name),
41        error: sort.annotations.iter().find_map(|i| {
42            if let Annotation::Error(e) = i {
43                Some(e)
44            } else {
45                None
46            }
47        }),
48    };
49
50    //Check if this result is cached
51    let key = (pos.position(), expr_name);
52    if let Some(cached) = cache.get_mut(&key) {
53        return cached.clone();
54    }
55
56    //Before executing, put a value for the current position in the cache.
57    //This value is used if the rule is left-recursive
58    let cache_state = cache.state_current();
59    cache.insert(
60        key,
61        ParseResult::new_err(
62            ParsePairRaw::Error(Span::from_length(state.file, pos.position(), 0)),
63            pos.clone(),
64            pos.clone(),
65        ),
66    );
67
68    //Now execute the actual rule, taking into account left recursion
69    //The way this is done is heavily inspired by http://web.cs.ucla.edu/~todd/research/pepm08.pdf
70    //A quick summary
71    //- First put an error value for the current (rule, position) in the cache
72    //- Try to parse the current (rule, position). If this fails, there is definitely no left recursion. Otherwise, we now have a seed.
73    //- Put the new seed in the cache, and rerun on the current (rule, position). Make sure to revert the cache to the previous state.
74    //- At some point, the above will fail. Either because no new input is parsed, or because the entire parse now failed. At this point, we have reached the maximum size.
75    let mut res = parse_expression(state, cache, expr, pos.clone(), &sort_context);
76    let res = if res.ok {
77        //Do we have a leftrec case?
78        if !cache.is_read(&key).unwrap() {
79            //There was no leftrec, just return the value
80            res
81        } else {
82            //There was leftrec, we need to grow the seed
83            loop {
84                //Insert the current seed into the cache
85                cache.state_revert(cache_state);
86                cache.insert(key, res.clone());
87
88                //Grow the seed
89                let new_res = parse_expression(state, cache, expr, pos.clone(), &sort_context);
90                if !new_res.ok {
91                    break;
92                }
93                if new_res.pos.position() <= res.pos.position() {
94                    break;
95                }
96                res = new_res;
97            }
98            //The seed is at its maximum size
99            cache.insert(key, res.clone());
100            res
101        }
102    } else {
103        // Left recursion value was used, but did not make a seed.
104        // This is an illegal grammar!
105        if cache.is_read(&key).unwrap() {
106            cache.add_error(PEGParseError::fail_left_recursion(Span::from_length(
107                state.file,
108                pos.position(),
109                0,
110            )));
111        }
112        res
113    };
114
115    cache.insert(key, res.clone());
116
117    //Return result
118    res
119}
120
121pub fn parse_expression<'src>(
122    state: &ParserContext<'src>,
123    cache: &mut ParserState<'src>,
124    expr: &'src CoreExpression,
125    mut pos: SourceFileIterator<'src>,
126    sort_context: &ExpressionContext<'src>,
127) -> ParseResult<'src, ParsePairRaw> {
128    match expr {
129        //To parse a sort, call parse_sort recursively.
130        CoreExpression::Name(sort_name) => {
131            let res = parse_expression_name(state, cache, sort_name, pos);
132            res.map(|s| ParsePairRaw::Name(s.span(), Box::new(s)))
133        }
134        //To parse a character class, check if the character is accepted, and make an ok/error based on that.
135        CoreExpression::CharacterClass(characters) => {
136            while cache.allow_layout && !pos.clone().accept(characters) {
137                let (ok, after_layout_pos) =
138                    skip_single_layout(state, cache, pos.clone(), sort_context);
139                if !ok {
140                    break;
141                };
142                pos = after_layout_pos;
143            }
144            let span = Span::from_length(state.file, pos.position(), 1);
145            if pos.accept(characters) {
146                if cache.no_layout_nest_count > 0 {
147                    cache.allow_layout = false;
148                }
149                ParseResult::new_ok(ParsePairRaw::Empty(span), pos.clone(), pos, false)
150            } else {
151                cache.add_error(PEGParseError::expect(
152                    span.clone(),
153                    Expect::ExpectCharClass(characters.clone()),
154                    sort_context,
155                ));
156                ParseResult::new_err(ParsePairRaw::Error(span), pos.clone(), pos)
157            }
158        }
159        //To parse a sequence, parse each constructor in the sequence.
160        //The results are added to `results`, and the best error and position are updated each time.
161        //Finally, construct a `ParsePairConstructor::List` with the results.
162        CoreExpression::Sequence(subexprs) => {
163            let mut results = vec![];
164            let start_pos = pos.position();
165            let mut pos_err = pos.clone();
166            let mut recovered = false;
167
168            //Parse all subconstructors in sequence
169            for (i, subexpr) in subexprs.iter().enumerate() {
170                let res = parse_expression(state, cache, subexpr, pos, sort_context);
171                pos = res.pos;
172                pos_err.max_pos(res.pos_err.clone());
173                results.push(res.result);
174                recovered |= res.recovered;
175                if !res.ok {
176                    if let Some(&offset) = state.errors.get(&res.pos_err.position()) {
177                        //The first token of the sequence can not be skipped, otherwise we can just parse a lot of empty sequences, if a sequence happens in a repeat
178                        if i != 0 && cache.no_errors_nest_count == 0 {
179                            pos = res.pos_err;
180                            //If we're at the end of the file, don't try
181                            if pos.peek().is_none() {
182                                let span = Span::from_end(state.file, start_pos, pos.position());
183                                return ParseResult::new_err(
184                                    ParsePairRaw::List(span, results),
185                                    pos,
186                                    pos_err,
187                                );
188                            }
189                            pos.skip_n(offset);
190                            recovered = true;
191                            continue;
192                        }
193                    }
194
195                    let start_pos = results
196                        .get(0)
197                        .map(|pp| pp.span().position)
198                        .unwrap_or(start_pos);
199                    let span = Span::from_end(state.file, start_pos, pos.position());
200                    return ParseResult::new_err(ParsePairRaw::List(span, results), pos, pos_err);
201                }
202            }
203
204            //Construct result
205            let start_pos = results
206                .get(0)
207                .map(|pp| pp.span().position)
208                .unwrap_or(start_pos);
209            let span = Span::from_end(state.file, start_pos, pos.position());
210            ParseResult::new_ok(ParsePairRaw::List(span, results), pos, pos_err, recovered)
211        }
212        //To parse a sequence, first parse the minimum amount that is needed.
213        //Then keep trying to parse the constructor until the maximum is reached.
214        //The results are added to `results`, and the best error and position are updated each time.
215        //Finally, construct a `ParsePairConstructor::List` with the results.
216        CoreExpression::Repeat { subexpr, min, max } => {
217            let mut results = vec![];
218            let start_pos = pos.position();
219            let mut last_pos = pos.position();
220            let mut pos_err = pos.clone();
221            let mut recovered = false;
222
223            //Parse at most maximum times
224            for i in 0..max.unwrap_or(u64::MAX) {
225                let res =
226                    parse_expression(state, cache, subexpr.as_ref(), pos.clone(), sort_context);
227                pos_err.max_pos(res.pos_err.clone());
228                recovered |= res.recovered;
229
230                if res.ok {
231                    pos = res.pos;
232                    results.push(res.result);
233                } else {
234                    //If we know about this error, try to continue?
235                    //Don't try to continue if we haven't made any progress (already failed on first character), since we will just fail again
236                    //Also don't try to continue if we don't allow errors at the moment, since we don't want to try to recover inside of an no-errors segment
237                    if let Some(&offset) = state.errors.get(&res.pos_err.position()) {
238                        if (offset > 0 || pos.position() != res.pos_err.position())
239                            && cache.no_errors_nest_count == 0
240                        {
241                            pos = res.pos_err.clone();
242                            //If we're at the end of the file, don't try
243                            if pos.peek().is_none() {
244                                let span = Span::from_end(state.file, start_pos, pos.position());
245                                return ParseResult::new_err(
246                                    ParsePairRaw::List(span, results),
247                                    pos,
248                                    pos_err,
249                                );
250                            }
251                            pos.skip_n(offset);
252                            results.push(res.result);
253                            recovered = true;
254                            continue;
255                        }
256                    }
257                    //If we have not yet reached the minimum, we error.
258                    //Otherwise, we break and ok after the loop body.
259                    //In case we reached the minimum, we don't push the error, even though the failure might've been an error.
260                    //This is because it's probably OK, and we want no Error pairs in the parse tree when it's OK.
261                    if i < *min {
262                        pos = res.pos;
263                        results.push(res.result);
264                        let start_pos = results
265                            .get(0)
266                            .map(|pp| pp.span().position)
267                            .unwrap_or(start_pos);
268                        let span = Span::from_end(state.file, start_pos, pos.position());
269                        return ParseResult::new_err(
270                            ParsePairRaw::List(span, results),
271                            pos,
272                            pos_err,
273                        );
274                    } else {
275                        break;
276                    }
277                }
278                //If the position hasn't changed, then we're in an infinite loop
279                if last_pos == pos.position() {
280                    let span = Span::from_length(state.file, pos.position(), 0);
281                    cache.add_error(PEGParseError::fail_loop(span.clone()));
282                    return ParseResult::new_err(ParsePairRaw::List(span, results), pos, pos_err);
283                }
284                last_pos = pos.position();
285            }
286
287            //Construct result
288            let start_pos = results
289                .get(0)
290                .map(|pp| pp.span().position)
291                .unwrap_or(start_pos);
292            let span = Span::from_end(state.file, start_pos, pos.position());
293            ParseResult::new_ok(
294                ParsePairRaw::List(span, results),
295                pos.clone(),
296                pos_err,
297                recovered,
298            )
299        }
300        //To parse a choice, try each constructor, keeping track of the best error that occurred while doing so.
301        //If none of the constructors succeed, we will return this error.
302        CoreExpression::Choice(subexprs) => {
303            //Try each constructor, keeping track of the best error that occurred while doing so.
304            //If none of the constructors succeed, we will return this error.
305            let mut results = vec![];
306            assert!(!subexprs.is_empty());
307            for (i, subexpr) in subexprs.iter().enumerate() {
308                let res = parse_expression(state, cache, subexpr, pos.clone(), sort_context);
309                if res.ok && !res.recovered {
310                    return ParseResult::new_ok(
311                        ParsePairRaw::Choice(res.result.span(), i, Box::new(res.result)),
312                        res.pos,
313                        res.pos_err,
314                        res.recovered,
315                    );
316                }
317                results.push(res);
318            }
319            //Chose best candidate
320            let (i, res) = results
321                .into_iter()
322                .enumerate()
323                .max_by_key(|(_, r)| r.pos_err.position())
324                .unwrap();
325            ParseResult::new(
326                ParsePairRaw::Choice(res.result.span(), i, Box::new(res.result)),
327                res.pos,
328                res.pos_err,
329                res.ok,
330                res.recovered,
331            )
332        }
333        //No layout is parsed by setting the no layout flag during parsing
334        //After the block is completed, if no layout nest count is 0, re-allow layout.
335        CoreExpression::FlagNoLayout(subexpr) => {
336            cache.no_layout_nest_count += 1;
337            let res = parse_expression(state, cache, subexpr, pos, sort_context);
338            cache.no_layout_nest_count -= 1;
339            if cache.no_layout_nest_count == 0 {
340                cache.allow_layout = true;
341            }
342            res
343        }
344        //No errors is parsed by setting the no errors flag during parsing
345        //After the block is completed, is not ok, produce an error.
346        CoreExpression::FlagNoErrors(subexpr, name) => {
347            cache.no_errors_nest_count += 1;
348            let start_pos = pos.position();
349            let res = parse_expression(state, cache, subexpr, pos.clone(), sort_context);
350            cache.no_errors_nest_count -= 1;
351            if !res.ok {
352                let mut next_pos = res.pos.clone();
353                next_pos.skip_n(1);
354                let span = Span::from_end(state.file, start_pos, next_pos.position());
355                let err =
356                    PEGParseError::expect(span, Expect::ExpectSort(name.to_string()), sort_context);
357                cache.add_error(err);
358            }
359            res
360        }
361        CoreExpression::Error(e, msg) => {
362            let pos_backup = pos.clone();
363            let start_pos = pos.position();
364            let mut res = parse_expression(state, cache, e, pos.clone(), sort_context);
365
366            if res.ok {
367                let mut next_pos = res.pos.clone();
368                next_pos.skip_n(1);
369                let span = Span::from_end(state.file, start_pos, next_pos.position());
370                let err =
371                    PEGParseError::expect(span, Expect::Custom(msg.to_string()), sort_context);
372                cache.add_error(err);
373
374                res.pos = pos_backup;
375            }
376            res
377        }
378    }
379}
380
381pub fn skip_single_layout<'src>(
382    state: &ParserContext<'src>,
383    cache: &mut ParserState<'src>,
384    pos: SourceFileIterator<'src>,
385    sort_context: &ExpressionContext<'src>,
386) -> (bool, SourceFileIterator<'src>) {
387    //Automatically make layout rule no-layout and no-errors
388    let prev_allow_layout = cache.allow_layout;
389    cache.no_layout_nest_count += 1;
390    cache.no_errors_nest_count += 1;
391    cache.allow_layout = false;
392
393    let layout_sort = state.ast.sorts.get("layout").expect("Layout exists");
394    let layout_expr = &layout_sort.expr;
395    let layout_res = parse_expression(state, cache, layout_expr, pos.clone(), sort_context);
396
397    cache.no_layout_nest_count -= 1;
398    cache.no_errors_nest_count -= 1;
399    cache.allow_layout = prev_allow_layout;
400
401    (layout_res.ok, layout_res.pos)
402}