aoc_parse/parsers/
repeat.rs

1//! Parsing a repeated pattern.
2
3use crate::{
4    parsers::{empty, EmptyParser},
5    types::ParserOutput,
6    ParseContext, ParseIter, Parser, Reported, Result,
7};
8
9#[derive(Clone, Copy)]
10pub struct RepeatParser<Pattern, Sep> {
11    pattern: Pattern,
12    min: usize,
13    max: Option<usize>,
14    sep: Sep,
15    sep_is_terminator: bool,
16}
17
18pub struct RepeatParseIter<'parse, Pattern, Sep>
19where
20    Pattern: Parser + 'parse,
21    Sep: Parser + 'parse,
22{
23    params: &'parse RepeatParser<Pattern, Sep>,
24    start: usize,
25    pattern_iters: Vec<Pattern::Iter<'parse>>,
26    sep_iters: Vec<Sep::Iter<'parse>>,
27}
28
29impl<Pattern, Sep> Parser for RepeatParser<Pattern, Sep>
30where
31    Pattern: Parser,
32    Sep: Parser,
33{
34    type Output = Vec<Pattern::Output>;
35    type RawOutput = (Vec<Pattern::Output>,);
36    type Iter<'parse> = RepeatParseIter<'parse, Pattern, Sep>
37    where
38        Pattern: 'parse,
39        Sep: 'parse;
40
41    fn parse_iter<'parse>(
42        &'parse self,
43        context: &mut ParseContext<'parse>,
44        start: usize,
45    ) -> Result<Self::Iter<'parse>, Reported> {
46        let mut iter = RepeatParseIter {
47            params: self,
48            start,
49            pattern_iters: vec![],
50            sep_iters: vec![],
51        };
52        iter.next(context, Mode::Advance)?;
53        Ok(iter)
54    }
55}
56
57impl<Pattern, Sep> RepeatParser<Pattern, Sep> {
58    fn check_repeat_count(&self, count: usize) -> bool {
59        let expected_parity = !self.sep_is_terminator as usize;
60        let nmatches = (count + expected_parity) / 2;
61        (count == 0 || count % 2 == expected_parity)
62            && self.min <= nmatches
63            && match self.max {
64                None => true,
65                Some(max) => nmatches <= max,
66            }
67    }
68}
69
70// Internal state of the next() method.
71enum Mode {
72    BacktrackTopIter,
73    Advance,
74    Exhausted,
75    YieldThenBacktrack,
76}
77
78impl<'parse, Pattern, Sep> RepeatParseIter<'parse, Pattern, Sep>
79where
80    Pattern: Parser,
81    Sep: Parser,
82{
83    fn num_matches(&self) -> usize {
84        self.pattern_iters.len() + self.sep_iters.len()
85    }
86
87    // True if we've matched as many separators as patterns, so pattern is next.
88    fn is_pattern_next(&self) -> bool {
89        self.pattern_iters.len() == self.sep_iters.len()
90    }
91
92    /// End position of what's been matched so far.
93    fn end(&self) -> usize {
94        if self.num_matches() == 0 {
95            self.start
96        } else if self.is_pattern_next() {
97            self.sep_iters.last().unwrap().match_end()
98        } else {
99            self.pattern_iters.last().unwrap().match_end()
100        }
101    }
102
103    /// Precondition: Either there are no iters or we just successfully
104    /// backtracked the foremost iter.
105    ///
106    /// This never returns success because we keep advancing until we fail to
107    /// match, then return the error without trying to backtrack.
108    fn advance(&mut self, context: &mut ParseContext<'parse>) -> Result<(), Reported> {
109        // TODO: When considering creating a new iterator, if we have already
110        // matched `max` times, don't bother; no matches can come of it.
111        loop {
112            assert_eq!(self.pattern_iters.len(), (self.num_matches() + 1) / 2);
113            assert_eq!(self.sep_iters.len(), self.num_matches() / 2);
114
115            if self.is_pattern_next() {
116                let start = self.end();
117                let iter = self.params.pattern.parse_iter(context, start)?;
118                self.pattern_iters.push(iter);
119            }
120
121            let start = self.end();
122            let iter = self.params.sep.parse_iter(context, start)?;
123            self.sep_iters.push(iter);
124        }
125    }
126
127    fn next(&mut self, context: &mut ParseContext<'parse>, mut mode: Mode) -> Result<(), Reported> {
128        loop {
129            match mode {
130                Mode::BacktrackTopIter => {
131                    // Need to call backtrack() on the top iter. If that
132                    // succeeds, advance again.
133                    assert_eq!(self.pattern_iters.len(), (self.num_matches() + 1) / 2);
134                    assert_eq!(self.sep_iters.len(), self.num_matches() / 2);
135
136                    if self.num_matches() == 0 {
137                        // No more iterators. We exhausted all possibilities.
138                        return Err(Reported);
139                    }
140                    let backtrack_result = if self.is_pattern_next() {
141                        self.sep_iters.last_mut().unwrap().backtrack(context)
142                    } else {
143                        self.pattern_iters.last_mut().unwrap().backtrack(context)
144                    };
145
146                    mode = match backtrack_result {
147                        // Got a match! But don't return it to the user yet.
148                        // Repeats are "greedy"; we press on to see if we can
149                        // match again! If we just matched `pattern`, try
150                        // `sep`; if we just matched `sep`, try `pattern`.
151                        Ok(()) => Mode::Advance,
152                        Err(Reported) => Mode::Exhausted,
153                    };
154                }
155                Mode::Advance => {
156                    // Scan forward, hoping to find matches and create new
157                    // iterators. (`let _ =` because advance always fails.)
158                    let _ = self.advance(context);
159                    mode = Mode::YieldThenBacktrack;
160                }
161                Mode::Exhausted => {
162                    // We just called backtrace() on the top iter, and it
163                    // failed. It's exhausted and needs to be discarded.
164                    assert_eq!(self.pattern_iters.len(), (self.num_matches() + 1) / 2);
165                    assert_eq!(self.sep_iters.len(), self.num_matches() / 2);
166
167                    if self.is_pattern_next() {
168                        self.sep_iters.pop();
169                    } else {
170                        self.pattern_iters.pop();
171                    }
172                    mode = Mode::YieldThenBacktrack;
173                }
174
175                Mode::YieldThenBacktrack => {
176                    // We just either popped an exhausted iterator, or failed
177                    // to create one. If the current status is an overall
178                    // match, yield that. Then transition to BacktrackTopIter
179                    // mode.
180                    //
181                    // (Repeats are "greedy", so we need to yield the longest match
182                    // first. This means returning only "on the way out", a
183                    // postorder walk of the tree of possible parses.)
184                    if self.params.check_repeat_count(self.num_matches()) {
185                        return Ok(());
186                    }
187                    mode = Mode::BacktrackTopIter;
188                }
189            }
190        }
191    }
192}
193
194impl<'parse, Pattern, Sep> ParseIter<'parse> for RepeatParseIter<'parse, Pattern, Sep>
195where
196    Pattern: Parser,
197    Sep: Parser,
198{
199    type RawOutput = (Vec<Pattern::Output>,);
200
201    fn match_end(&self) -> usize {
202        self.end()
203    }
204
205    fn backtrack(&mut self, context: &mut ParseContext<'parse>) -> Result<(), Reported> {
206        self.next(context, Mode::BacktrackTopIter)
207    }
208
209    fn convert(&self) -> (Vec<Pattern::Output>,) {
210        let v = self
211            .pattern_iters
212            .iter()
213            .map(|iter| iter.convert().into_user_type())
214            .collect();
215        (v,)
216    }
217}
218
219pub fn repeat<Pattern, Sep>(
220    pattern: Pattern,
221    sep: Sep,
222    min: usize,
223    max: Option<usize>,
224    sep_is_terminator: bool,
225) -> RepeatParser<Pattern, Sep> {
226    RepeatParser {
227        pattern,
228        min,
229        max,
230        sep,
231        sep_is_terminator,
232    }
233}
234
235/// Used by the `parser!()` macro to implement the `*` quantifier.
236#[doc(hidden)]
237pub fn star<Pattern>(pattern: Pattern) -> RepeatParser<Pattern, EmptyParser> {
238    repeat(pattern, empty(), 0, None, false)
239}
240
241/// Used by the `parser!()` macro to implement the `+` quantifier.
242#[doc(hidden)]
243pub fn plus<Pattern>(pattern: Pattern) -> RepeatParser<Pattern, EmptyParser> {
244    repeat(pattern, empty(), 1, None, false)
245}
246
247/// <code>repeat_sep(<var>pattern</var>, <var>separator</var>)</code> matches
248/// the given *pattern* any number of times, separated by the *separator*. For
249/// example, `parser!(repeat_sep(i32, ","))` matches a list of comma-separated
250/// integers.
251///
252/// This converts only the bits that match *pattern* to Rust values, producing
253/// a `Vec`. Any parts of the string matched by *separator* are not converted.
254pub fn repeat_sep<Pattern, Sep>(pattern: Pattern, sep: Sep) -> RepeatParser<Pattern, Sep> {
255    repeat(pattern, sep, 0, None, false)
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261    use crate::parsers::usize;
262    use crate::testing::*;
263
264    #[test]
265    fn test_repeat_basics() {
266        let p = star("a");
267        assert_parse_eq(p, "", vec![]);
268        assert_parse_eq(p, "a", vec![()]);
269        assert_parse_eq(p, "aa", vec![(), ()]);
270        assert_parse_eq(p, "aaa", vec![(), (), ()]);
271        assert_no_parse(p, "b");
272        assert_no_parse(p, "ab");
273        assert_no_parse(p, "ba");
274
275        let p = repeat_sep("cow", ",");
276        assert_parse_eq(p, "", vec![]);
277        assert_parse_eq(p, "cow", vec![()]);
278        assert_parse_eq(p, "cow,cow", vec![(), ()]);
279        assert_parse_eq(p, "cow,cow,cow", vec![(), (), ()]);
280        assert_no_parse(p, "cowcow");
281        assert_no_parse(p, "cow,");
282        assert_no_parse(p, "cow,,cow");
283        assert_no_parse(p, "cow,cow,");
284        assert_no_parse(p, ",");
285
286        let p = plus("a");
287        assert_no_parse(p, "");
288        assert_parse_eq(p, "a", vec![()]);
289        assert_parse_eq(p, "aa", vec![(), ()]);
290
291        let p = repeat_sep(usize, ",");
292        assert_parse_eq(p, "11417,0,0,334", vec![11417usize, 0, 0, 334]);
293    }
294}