1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
//! Mainly error tracking for the overall parse.

use std::{any::Any, collections::HashMap};

use crate::ParseError;

/// Error type for when an error has been reported to ParseContext.
///
/// It's OK to discard this kind of error and return success. See
/// `ParseContext` for an example.
pub struct Reported;

/// Contains the source text we're parsing and tracks errors.
///
/// We track errors in the ParseContext, not Results, because often a parser
/// produces both a successful match *and* the error that will later prove to
/// be the best error message for the overall parse attempt.
///
/// # The `:wq` example
///
/// For example, consider `parser!(line(u32)+)` parsing the following input:
///
/// ```text
/// 1
/// 2
/// 3
/// 4:wq
/// 5
/// ```
///
/// Clearly, someone accidentally typed `:wq` on line 4. But what will happen
/// here is that `line(u32)+` successfully matches the first 3 lines. If we
/// don't track the error we encountered when trying to parse `5:wq` as a
/// `u32`, but content ourselves with the successful match `line(u32)+`
/// produces, the best error message we can ultimately produce is something
/// like "found extra text after a successful match at line 4, column 1".
///
/// Here's how we now handle these success-failures:
///
/// -   `u32` returns success, matching `4`.
///
/// -   `line(u32)` reports an error to the context (at line 4 column 2) and
///      returns `Reported`, because `u32` didn't match the entire line.
///
/// -   `line(u32)+` then *discards* the `Reported` error, backtracks,
///     and returns a successful match for the first 3 lines.
///
/// -   The top-level parser sees that `line(u32)+` didn't match the entire
///     input, and reports an error to the context at line 4 column 1.
///     But we already have a previous error where we had got further,
///     so this error is ignored.
///
/// -   The error at line 4 column 2 is taken from the context and returned to
///     the user.
///
/// # Rationale: Design alternatives
///
/// To implement this without `ParseContext`, we could have implemented a
/// `TigerResult<T, E>` type that can be `Ok(T)`, `Err(E)`, or `OkBut(T, E)`,
/// the last containing *both* a success value *and* an excuse explaining why
/// we did not succeed even more. The forwardmost error would be propagated
/// there instead of being stored in the context. We would use `TigerResult<T,
/// ParseError>` instead of `Result<T, Reported>` everywhere. Both ways have
/// advantages. Both are pretty weird for Rust. The way of the context is
/// something I've wanted to explore in Rust; and it lets us keep using the `?`
/// try operator.
pub struct ParseContext<'parse> {
    source: &'parse str,
    foremost_error: Option<ParseError>,
    rule_sets: HashMap<usize, &'parse [Box<dyn Any>]>,
}

impl<'parse> ParseContext<'parse> {
    /// Create a `ParseContext` to parse the given input.
    pub fn new(source: &'parse str) -> Self {
        ParseContext {
            source,
            foremost_error: None,
            rule_sets: HashMap::new(),
        }
    }

    /// The text being parsed.
    pub fn source(&self) -> &'parse str {
        self.source
    }

    /// Extract the error. Use this only after receiving `Reported` from an
    /// operation on the context.
    ///
    /// # Panics
    ///
    /// If no error has been reported on this context.
    pub fn into_reported_error(self) -> ParseError {
        self.foremost_error
            .expect("a parse error should have been reported")
    }

    /// Create a temporary child context for parsing a slice of `self.source`.
    /// Invoke the given closure `f` with that temporary context. Propagate
    /// errors to `self`.
    ///
    /// Rule sets registered while running `f` are retained in `self`.
    /// They're cheap enough.
    ///
    /// The `'parse` lifetime of the nested context is the same as for `self`,
    /// not narrower. That's the lifetime of `source`, which is the same for
    /// the slice as for the whole.
    pub(crate) fn with_slice<F, T>(&mut self, start: usize, end: usize, f: F) -> Result<T, Reported>
    where
        F: for<'a> FnOnce(&'a mut Self) -> Result<T, Reported>,
    {
        let mut inner_context = ParseContext {
            source: &self.source[start..end],
            foremost_error: None,
            rule_sets: HashMap::new(),
        };

        std::mem::swap(&mut self.rule_sets, &mut inner_context.rule_sets);

        let r = f(&mut inner_context);

        std::mem::swap(&mut self.rule_sets, &mut inner_context.rule_sets);

        if r.is_err() {
            self.report(
                inner_context
                    .into_reported_error()
                    .adjust_location(self.source, start),
            );
        }
        r
    }

    /// Record an error.
    ///
    /// Currently a ParseContext only tracks the foremost error. That is, if
    /// `err.location` is farther forward than any other error we've
    /// encountered, we store it. Otherwise discard it.
    ///
    /// Nontrivial patterns try several different things. If anything succeeds,
    /// we get a match. We only fail if every branch leads to failure. This
    /// means that by the time matching fails, we have an abundance of
    /// different error messages. Generally the error we want is the one where
    /// we progressed as far as possible through the input string before
    /// failing.
    pub fn report(&mut self, err: ParseError) -> Reported {
        if Some(err.location) > self.foremost_error.as_ref().map(|err| err.location) {
            self.foremost_error = Some(err);
        }
        Reported
    }

    /// Record a `foo expected` error.
    pub fn error_expected(&mut self, start: usize, expected: &str) -> Reported {
        self.report(ParseError::new_expected(self.source(), start, expected))
    }

    /// Record an error when `FromStr::from_str` fails.
    pub fn error_from_str_failed(
        &mut self,
        start: usize,
        end: usize,
        type_name: &'static str,
        message: String,
    ) -> Reported {
        self.report(ParseError::new_from_str_failed(
            self.source(),
            start,
            end,
            type_name,
            message,
        ))
    }

    /// Record an "extra unparsed text after match" error.
    pub fn error_extra(&mut self, location: usize) -> Reported {
        self.report(ParseError::new_extra(self.source(), location))
    }

    pub(crate) fn register_rule_set(
        &mut self,
        rule_set_id: usize,
        rule_parsers: &'parse [Box<dyn Any>],
    ) {
        self.rule_sets.insert(rule_set_id, rule_parsers);
    }

    pub(crate) fn fetch_parser_for_rule(
        &self,
        rule_set_id: usize,
        index: usize,
    ) -> &'parse dyn Any {
        let rule_parsers: &'parse [Box<dyn Any>] = self
            .rule_sets
            .get(&rule_set_id)
            .expect("internal error: rule set not registered");
        &*rule_parsers[index]
    }
}

#[cfg(test)]
mod tests {
    use crate::parsers::{lines, sections, u64};
    use crate::testing::*;

    #[test]
    fn test_repeat_region_errors() {
        let p = lines(u64);

        let example1 = "\
            194832\n\
            2094235\n\
            374274\n\
            4297534:wq\n\
            59842843\n\
        ";

        assert_parse_error(
            &p,
            example1,
            "matched part of the line, but not all of it at line 4 column 8",
        );

        let p = sections(lines(u64));

        let example2 = "\
            1\n\
            \n\
            3\n\
            4\n\
            5\n\
            6:wq\n\
            \n\
            8\n\
            9\n\
        ";

        assert_parse_error(
            &p,
            example2,
            "matched part of the line, but not all of it at line 6 column 2",
        );
    }
}