1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
//! Mainly error tracking for the overall parse.
use std::{any::Any, collections::HashMap};
use crate::ParseError;
/// Error type for when an error has been reported to ParseContext.
///
/// It's OK to discard this kind of error and return success. See
/// `ParseContext` for an example.
pub struct Reported;
/// Contains the source text we're parsing and tracks errors.
///
/// We track errors in the ParseContext, not Results, because often a parser
/// produces both a successful match *and* the error that will later prove to
/// be the best error message for the overall parse attempt.
///
/// # The `:wq` example
///
/// For example, consider `parser!(line(u32)+)` parsing the following input:
///
/// ```text
/// 1
/// 2
/// 3
/// 4:wq
/// 5
/// ```
///
/// Clearly, someone accidentally typed `:wq` on line 4. But what will happen
/// here is that `line(u32)+` successfully matches the first 3 lines. If we
/// don't track the error we encountered when trying to parse `5:wq` as a
/// `u32`, but content ourselves with the successful match `line(u32)+`
/// produces, the best error message we can ultimately produce is something
/// like "found extra text after a successful match at line 4, column 1".
///
/// Here's how we now handle these success-failures:
///
/// - `u32` returns success, matching `4`.
///
/// - `line(u32)` reports an error to the context (at line 4 column 2) and
/// returns `Reported`, because `u32` didn't match the entire line.
///
/// - `line(u32)+` then *discards* the `Reported` error, backtracks,
/// and returns a successful match for the first 3 lines.
///
/// - The top-level parser sees that `line(u32)+` didn't match the entire
/// input, and reports an error to the context at line 4 column 1.
/// But we already have a previous error where we had got further,
/// so this error is ignored.
///
/// - The error at line 4 column 2 is taken from the context and returned to
/// the user.
///
/// # Rationale: Design alternatives
///
/// To implement this without `ParseContext`, we could have implemented a
/// `TigerResult<T, E>` type that can be `Ok(T)`, `Err(E)`, or `OkBut(T, E)`,
/// the last containing *both* a success value *and* an excuse explaining why
/// we did not succeed even more. The forwardmost error would be propagated
/// there instead of being stored in the context. We would use `TigerResult<T,
/// ParseError>` instead of `Result<T, Reported>` everywhere. Both ways have
/// advantages. Both are pretty weird for Rust. The way of the context is
/// something I've wanted to explore in Rust; and it lets us keep using the `?`
/// try operator.
pub struct ParseContext<'parse> {
source: &'parse str,
foremost_error: Option<ParseError>,
rule_sets: HashMap<usize, &'parse [Box<dyn Any>]>,
}
impl<'parse> ParseContext<'parse> {
/// Create a `ParseContext` to parse the given input.
pub fn new(source: &'parse str) -> Self {
ParseContext {
source,
foremost_error: None,
rule_sets: HashMap::new(),
}
}
/// The text being parsed.
pub fn source(&self) -> &'parse str {
self.source
}
/// Extract the error. Use this only after receiving `Reported` from an
/// operation on the context.
///
/// # Panics
///
/// If no error has been reported on this context.
pub fn into_reported_error(self) -> ParseError {
self.foremost_error
.expect("a parse error should have been reported")
}
/// Create a temporary child context for parsing a slice of `self.source`.
/// Invoke the given closure `f` with that temporary context. Propagate
/// errors to `self`.
///
/// Rule sets registered while running `f` are retained in `self`.
/// They're cheap enough.
///
/// The `'parse` lifetime of the nested context is the same as for `self`,
/// not narrower. That's the lifetime of `source`, which is the same for
/// the slice as for the whole.
pub(crate) fn with_slice<F, T>(&mut self, start: usize, end: usize, f: F) -> Result<T, Reported>
where
F: for<'a> FnOnce(&'a mut Self) -> Result<T, Reported>,
{
let mut inner_context = ParseContext {
source: &self.source[start..end],
foremost_error: None,
rule_sets: HashMap::new(),
};
std::mem::swap(&mut self.rule_sets, &mut inner_context.rule_sets);
let r = f(&mut inner_context);
std::mem::swap(&mut self.rule_sets, &mut inner_context.rule_sets);
if r.is_err() {
self.report(
inner_context
.into_reported_error()
.adjust_location(self.source, start),
);
}
r
}
/// Record an error.
///
/// Currently a ParseContext only tracks the foremost error. That is, if
/// `err.location` is farther forward than any other error we've
/// encountered, we store it. Otherwise discard it.
///
/// Nontrivial patterns try several different things. If anything succeeds,
/// we get a match. We only fail if every branch leads to failure. This
/// means that by the time matching fails, we have an abundance of
/// different error messages. Generally the error we want is the one where
/// we progressed as far as possible through the input string before
/// failing.
pub fn report(&mut self, err: ParseError) -> Reported {
if Some(err.location) > self.foremost_error.as_ref().map(|err| err.location) {
self.foremost_error = Some(err);
}
Reported
}
/// Record a `foo expected` error.
pub fn error_expected(&mut self, start: usize, expected: &str) -> Reported {
self.report(ParseError::new_expected(self.source(), start, expected))
}
/// Record an error when `FromStr::from_str` fails.
pub fn error_from_str_failed(
&mut self,
start: usize,
end: usize,
type_name: &'static str,
message: String,
) -> Reported {
self.report(ParseError::new_from_str_failed(
self.source(),
start,
end,
type_name,
message,
))
}
/// Record an "extra unparsed text after match" error.
pub fn error_extra(&mut self, location: usize) -> Reported {
self.report(ParseError::new_extra(self.source(), location))
}
pub(crate) fn register_rule_set(
&mut self,
rule_set_id: usize,
rule_parsers: &'parse [Box<dyn Any>],
) {
self.rule_sets.insert(rule_set_id, rule_parsers);
}
pub(crate) fn fetch_parser_for_rule(
&self,
rule_set_id: usize,
index: usize,
) -> &'parse dyn Any {
let rule_parsers: &'parse [Box<dyn Any>] = self
.rule_sets
.get(&rule_set_id)
.expect("internal error: rule set not registered");
&*rule_parsers[index]
}
}
#[cfg(test)]
mod tests {
use crate::parsers::{lines, sections, u64};
use crate::testing::*;
#[test]
fn test_repeat_region_errors() {
let p = lines(u64);
let example1 = "\
194832\n\
2094235\n\
374274\n\
4297534:wq\n\
59842843\n\
";
assert_parse_error(
&p,
example1,
"matched part of the line, but not all of it at line 4 column 8",
);
let p = sections(lines(u64));
let example2 = "\
1\n\
\n\
3\n\
4\n\
5\n\
6:wq\n\
\n\
8\n\
9\n\
";
assert_parse_error(
&p,
example2,
"matched part of the line, but not all of it at line 6 column 2",
);
}
}