aoc_parse/context.rs
1//! Mainly error tracking for the overall parse.
2
3use std::{any::Any, collections::HashMap};
4
5use crate::ParseError;
6
7/// Error type for when an error has been reported to ParseContext.
8///
9/// It's OK to discard this kind of error and return success. See
10/// `ParseContext` for an example.
11pub struct Reported;
12
13/// Contains the source text we're parsing and tracks errors.
14///
15/// We track errors in the ParseContext, not Results, because often a parser
16/// produces both a successful match *and* the error that will later prove to
17/// be the best error message for the overall parse attempt.
18///
19/// # The `:wq` example
20///
21/// For example, consider `parser!(line(u32)+)` parsing the following input:
22///
23/// ```text
24/// 1
25/// 2
26/// 3
27/// 4:wq
28/// 5
29/// ```
30///
31/// Clearly, someone accidentally typed `:wq` on line 4. But what will happen
32/// here is that `line(u32)+` successfully matches the first 3 lines. If we
33/// don't track the error we encountered when trying to parse `5:wq` as a
34/// `u32`, but content ourselves with the successful match `line(u32)+`
35/// produces, the best error message we can ultimately produce is something
36/// like "found extra text after a successful match at line 4, column 1".
37///
38/// Here's how we now handle these success-failures:
39///
40/// - `u32` returns success, matching `4`.
41///
42/// - `line(u32)` reports an error to the context (at line 4 column 2) and
43/// returns `Reported`, because `u32` didn't match the entire line.
44///
45/// - `line(u32)+` then *discards* the `Reported` error, backtracks,
46/// and returns a successful match for the first 3 lines.
47///
48/// - The top-level parser sees that `line(u32)+` didn't match the entire
49/// input, and reports an error to the context at line 4 column 1.
50/// But we already have a previous error where we had got further,
51/// so this error is ignored.
52///
53/// - The error at line 4 column 2 is taken from the context and returned to
54/// the user.
55///
56/// # Rationale: Design alternatives
57///
58/// To implement this without `ParseContext`, we could have implemented a
59/// `TigerResult<T, E>` type that can be `Ok(T)`, `Err(E)`, or `OkBut(T, E)`,
60/// the last containing *both* a success value *and* an excuse explaining why
61/// we did not succeed even more. The forwardmost error would be propagated
62/// there instead of being stored in the context. We would use `TigerResult<T,
63/// ParseError>` instead of `Result<T, Reported>` everywhere. Both ways have
64/// advantages. Both are pretty weird for Rust. The way of the context is
65/// something I've wanted to explore in Rust; and it lets us keep using the `?`
66/// try operator.
67pub struct ParseContext<'parse> {
68 source: &'parse str,
69 foremost_error: Option<ParseError>,
70 rule_sets: HashMap<usize, &'parse [Box<dyn Any>]>,
71}
72
73impl<'parse> ParseContext<'parse> {
74 /// Create a `ParseContext` to parse the given input.
75 pub fn new(source: &'parse str) -> Self {
76 ParseContext {
77 source,
78 foremost_error: None,
79 rule_sets: HashMap::new(),
80 }
81 }
82
83 /// The text being parsed.
84 pub fn source(&self) -> &'parse str {
85 self.source
86 }
87
88 /// Extract the error. Use this only after receiving `Reported` from an
89 /// operation on the context.
90 ///
91 /// # Panics
92 ///
93 /// If no error has been reported on this context.
94 pub fn into_reported_error(self) -> ParseError {
95 self.foremost_error
96 .expect("a parse error should have been reported")
97 }
98
99 /// Create a temporary child context for parsing a slice of `self.source`.
100 /// Invoke the given closure `f` with that temporary context. Propagate
101 /// errors to `self`.
102 ///
103 /// Rule sets registered while running `f` are retained in `self`.
104 /// They're cheap enough.
105 ///
106 /// The `'parse` lifetime of the nested context is the same as for `self`,
107 /// not narrower. That's the lifetime of `source`, which is the same for
108 /// the slice as for the whole.
109 pub(crate) fn with_slice<F, T>(&mut self, start: usize, end: usize, f: F) -> Result<T, Reported>
110 where
111 F: for<'a> FnOnce(&'a mut Self) -> Result<T, Reported>,
112 {
113 let mut inner_context = ParseContext {
114 source: &self.source[start..end],
115 foremost_error: None,
116 rule_sets: HashMap::new(),
117 };
118
119 std::mem::swap(&mut self.rule_sets, &mut inner_context.rule_sets);
120
121 let r = f(&mut inner_context);
122
123 std::mem::swap(&mut self.rule_sets, &mut inner_context.rule_sets);
124
125 if r.is_err() {
126 self.report(
127 inner_context
128 .into_reported_error()
129 .adjust_location(self.source, start),
130 );
131 }
132 r
133 }
134
135 /// Record an error.
136 ///
137 /// Currently a ParseContext only tracks the foremost error. That is, if
138 /// `err.location` is farther forward than any other error we've
139 /// encountered, we store it. Otherwise discard it.
140 ///
141 /// Nontrivial patterns try several different things. If anything succeeds,
142 /// we get a match. We only fail if every branch leads to failure. This
143 /// means that by the time matching fails, we have an abundance of
144 /// different error messages. Generally the error we want is the one where
145 /// we progressed as far as possible through the input string before
146 /// failing.
147 pub fn report(&mut self, err: ParseError) -> Reported {
148 if Some(err.location) > self.foremost_error.as_ref().map(|err| err.location) {
149 self.foremost_error = Some(err);
150 }
151 Reported
152 }
153
154 /// Record a `foo expected` error.
155 pub fn error_expected(&mut self, start: usize, expected: &str) -> Reported {
156 self.report(ParseError::new_expected(self.source(), start, expected))
157 }
158
159 /// Record an error when `FromStr::from_str` fails.
160 pub fn error_from_str_failed(
161 &mut self,
162 start: usize,
163 end: usize,
164 type_name: &'static str,
165 message: String,
166 ) -> Reported {
167 self.report(ParseError::new_from_str_failed(
168 self.source(),
169 start,
170 end,
171 type_name,
172 message,
173 ))
174 }
175
176 /// Record an "extra unparsed text after match" error.
177 pub fn error_extra(&mut self, location: usize) -> Reported {
178 self.report(ParseError::new_extra(self.source(), location))
179 }
180
181 pub(crate) fn register_rule_set(
182 &mut self,
183 rule_set_id: usize,
184 rule_parsers: &'parse [Box<dyn Any>],
185 ) {
186 self.rule_sets.insert(rule_set_id, rule_parsers);
187 }
188
189 pub(crate) fn fetch_parser_for_rule(
190 &self,
191 rule_set_id: usize,
192 index: usize,
193 ) -> &'parse dyn Any {
194 let rule_parsers: &'parse [Box<dyn Any>] = self
195 .rule_sets
196 .get(&rule_set_id)
197 .expect("internal error: rule set not registered");
198 &*rule_parsers[index]
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use crate::parsers::{lines, sections, u64};
205 use crate::testing::*;
206
207 #[test]
208 fn test_repeat_region_errors() {
209 let p = lines(u64);
210
211 let example1 = "\
212 194832\n\
213 2094235\n\
214 374274\n\
215 4297534:wq\n\
216 59842843\n\
217 ";
218
219 assert_parse_error(
220 &p,
221 example1,
222 "matched part of the line, but not all of it at line 4 column 8",
223 );
224
225 let p = sections(lines(u64));
226
227 let example2 = "\
228 1\n\
229 \n\
230 3\n\
231 4\n\
232 5\n\
233 6:wq\n\
234 \n\
235 8\n\
236 9\n\
237 ";
238
239 assert_parse_error(
240 &p,
241 example2,
242 "matched part of the line, but not all of it at line 6 column 2",
243 );
244 }
245}