aoc_parse/
context.rs

1//! Mainly error tracking for the overall parse.
2
3use std::{any::Any, collections::HashMap};
4
5use crate::ParseError;
6
7/// Error type for when an error has been reported to ParseContext.
8///
9/// It's OK to discard this kind of error and return success. See
10/// `ParseContext` for an example.
11pub struct Reported;
12
13/// Contains the source text we're parsing and tracks errors.
14///
15/// We track errors in the ParseContext, not Results, because often a parser
16/// produces both a successful match *and* the error that will later prove to
17/// be the best error message for the overall parse attempt.
18///
19/// # The `:wq` example
20///
21/// For example, consider `parser!(line(u32)+)` parsing the following input:
22///
23/// ```text
24/// 1
25/// 2
26/// 3
27/// 4:wq
28/// 5
29/// ```
30///
31/// Clearly, someone accidentally typed `:wq` on line 4. But what will happen
32/// here is that `line(u32)+` successfully matches the first 3 lines. If we
33/// don't track the error we encountered when trying to parse `5:wq` as a
34/// `u32`, but content ourselves with the successful match `line(u32)+`
35/// produces, the best error message we can ultimately produce is something
36/// like "found extra text after a successful match at line 4, column 1".
37///
38/// Here's how we now handle these success-failures:
39///
40/// -   `u32` returns success, matching `4`.
41///
42/// -   `line(u32)` reports an error to the context (at line 4 column 2) and
43///      returns `Reported`, because `u32` didn't match the entire line.
44///
45/// -   `line(u32)+` then *discards* the `Reported` error, backtracks,
46///     and returns a successful match for the first 3 lines.
47///
48/// -   The top-level parser sees that `line(u32)+` didn't match the entire
49///     input, and reports an error to the context at line 4 column 1.
50///     But we already have a previous error where we had got further,
51///     so this error is ignored.
52///
53/// -   The error at line 4 column 2 is taken from the context and returned to
54///     the user.
55///
56/// # Rationale: Design alternatives
57///
58/// To implement this without `ParseContext`, we could have implemented a
59/// `TigerResult<T, E>` type that can be `Ok(T)`, `Err(E)`, or `OkBut(T, E)`,
60/// the last containing *both* a success value *and* an excuse explaining why
61/// we did not succeed even more. The forwardmost error would be propagated
62/// there instead of being stored in the context. We would use `TigerResult<T,
63/// ParseError>` instead of `Result<T, Reported>` everywhere. Both ways have
64/// advantages. Both are pretty weird for Rust. The way of the context is
65/// something I've wanted to explore in Rust; and it lets us keep using the `?`
66/// try operator.
67pub struct ParseContext<'parse> {
68    source: &'parse str,
69    foremost_error: Option<ParseError>,
70    rule_sets: HashMap<usize, &'parse [Box<dyn Any>]>,
71}
72
73impl<'parse> ParseContext<'parse> {
74    /// Create a `ParseContext` to parse the given input.
75    pub fn new(source: &'parse str) -> Self {
76        ParseContext {
77            source,
78            foremost_error: None,
79            rule_sets: HashMap::new(),
80        }
81    }
82
83    /// The text being parsed.
84    pub fn source(&self) -> &'parse str {
85        self.source
86    }
87
88    /// Extract the error. Use this only after receiving `Reported` from an
89    /// operation on the context.
90    ///
91    /// # Panics
92    ///
93    /// If no error has been reported on this context.
94    pub fn into_reported_error(self) -> ParseError {
95        self.foremost_error
96            .expect("a parse error should have been reported")
97    }
98
99    /// Create a temporary child context for parsing a slice of `self.source`.
100    /// Invoke the given closure `f` with that temporary context. Propagate
101    /// errors to `self`.
102    ///
103    /// Rule sets registered while running `f` are retained in `self`.
104    /// They're cheap enough.
105    ///
106    /// The `'parse` lifetime of the nested context is the same as for `self`,
107    /// not narrower. That's the lifetime of `source`, which is the same for
108    /// the slice as for the whole.
109    pub(crate) fn with_slice<F, T>(&mut self, start: usize, end: usize, f: F) -> Result<T, Reported>
110    where
111        F: for<'a> FnOnce(&'a mut Self) -> Result<T, Reported>,
112    {
113        let mut inner_context = ParseContext {
114            source: &self.source[start..end],
115            foremost_error: None,
116            rule_sets: HashMap::new(),
117        };
118
119        std::mem::swap(&mut self.rule_sets, &mut inner_context.rule_sets);
120
121        let r = f(&mut inner_context);
122
123        std::mem::swap(&mut self.rule_sets, &mut inner_context.rule_sets);
124
125        if r.is_err() {
126            self.report(
127                inner_context
128                    .into_reported_error()
129                    .adjust_location(self.source, start),
130            );
131        }
132        r
133    }
134
135    /// Record an error.
136    ///
137    /// Currently a ParseContext only tracks the foremost error. That is, if
138    /// `err.location` is farther forward than any other error we've
139    /// encountered, we store it. Otherwise discard it.
140    ///
141    /// Nontrivial patterns try several different things. If anything succeeds,
142    /// we get a match. We only fail if every branch leads to failure. This
143    /// means that by the time matching fails, we have an abundance of
144    /// different error messages. Generally the error we want is the one where
145    /// we progressed as far as possible through the input string before
146    /// failing.
147    pub fn report(&mut self, err: ParseError) -> Reported {
148        if Some(err.location) > self.foremost_error.as_ref().map(|err| err.location) {
149            self.foremost_error = Some(err);
150        }
151        Reported
152    }
153
154    /// Record a `foo expected` error.
155    pub fn error_expected(&mut self, start: usize, expected: &str) -> Reported {
156        self.report(ParseError::new_expected(self.source(), start, expected))
157    }
158
159    /// Record an error when `FromStr::from_str` fails.
160    pub fn error_from_str_failed(
161        &mut self,
162        start: usize,
163        end: usize,
164        type_name: &'static str,
165        message: String,
166    ) -> Reported {
167        self.report(ParseError::new_from_str_failed(
168            self.source(),
169            start,
170            end,
171            type_name,
172            message,
173        ))
174    }
175
176    /// Record an "extra unparsed text after match" error.
177    pub fn error_extra(&mut self, location: usize) -> Reported {
178        self.report(ParseError::new_extra(self.source(), location))
179    }
180
181    pub(crate) fn register_rule_set(
182        &mut self,
183        rule_set_id: usize,
184        rule_parsers: &'parse [Box<dyn Any>],
185    ) {
186        self.rule_sets.insert(rule_set_id, rule_parsers);
187    }
188
189    pub(crate) fn fetch_parser_for_rule(
190        &self,
191        rule_set_id: usize,
192        index: usize,
193    ) -> &'parse dyn Any {
194        let rule_parsers: &'parse [Box<dyn Any>] = self
195            .rule_sets
196            .get(&rule_set_id)
197            .expect("internal error: rule set not registered");
198        &*rule_parsers[index]
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use crate::parsers::{lines, sections, u64};
205    use crate::testing::*;
206
207    #[test]
208    fn test_repeat_region_errors() {
209        let p = lines(u64);
210
211        let example1 = "\
212            194832\n\
213            2094235\n\
214            374274\n\
215            4297534:wq\n\
216            59842843\n\
217        ";
218
219        assert_parse_error(
220            &p,
221            example1,
222            "matched part of the line, but not all of it at line 4 column 8",
223        );
224
225        let p = sections(lines(u64));
226
227        let example2 = "\
228            1\n\
229            \n\
230            3\n\
231            4\n\
232            5\n\
233            6:wq\n\
234            \n\
235            8\n\
236            9\n\
237        ";
238
239        assert_parse_error(
240            &p,
241            example2,
242            "matched part of the line, but not all of it at line 6 column 2",
243        );
244    }
245}