aoc_parse/parsers/
lines.rs

1//! Parsers that parse lines or groups of lines: `line(p)`, `lines(p)`.
2
3use std::marker::PhantomData;
4
5use crate::{
6    parsers::{star, EmptyParser, RepeatParser},
7    types::ParserOutput,
8    ParseContext, ParseError, ParseIter, Parser, Reported, Result,
9};
10
11/// This is implemented for `Line` and `Section`, the two region types.
12pub trait Region: Copy + Clone {
13    /// True if `start` is an offset within `source` that's the start of this
14    /// type of region.
15    ///
16    /// # Panics
17    ///
18    /// This can panic if `start` is not a character boundary in `source`.
19    fn check_at_start(context: &mut ParseContext, start: usize) -> Result<(), Reported>;
20
21    /// If a suitable end is found for this region (`'\n'` or `/\Z/` for a line, `/^\n/`
22    /// or `/\Z/` for a section) then return a pair of
23    ///
24    /// -   the end of the interior of the region, for the purpose of parsing the
25    ///     interior; and
26    /// -   the end of the delimiter, for the purpose of reporting how much data
27    ///     we consumed on a successful parse.
28    fn find_end(context: &mut ParseContext, start: usize) -> Result<(usize, usize), Reported>;
29
30    /// Report an error to `context` indicating that we found a region and
31    /// matched the text of the region to the expected subpattern, but the
32    /// match doesn't cover the entire region.
33    fn report_incomplete_match(context: &mut ParseContext, end: usize) -> Reported;
34}
35
36/// A line is a sequence of zero or more non-newline characters, starting
37/// either at the beginning of the input or immediately after a newline;
38/// followed by a single newline.
39#[derive(Debug, Clone, Copy)]
40pub struct Line;
41
42impl Region for Line {
43    fn check_at_start(context: &mut ParseContext, start: usize) -> Result<(), Reported> {
44        let source = context.source();
45        if start == 0 || source[..start].ends_with('\n') {
46            Ok(())
47        } else {
48            Err(context.report(ParseError::new_bad_line_start(source, start)))
49        }
50    }
51
52    fn find_end(context: &mut ParseContext, start: usize) -> Result<(usize, usize), Reported> {
53        let source = context.source();
54        match source[start..].find('\n') {
55            Some(offset) => Ok((start + offset, start + offset + 1)),
56            None if start != source.len() => Ok((source.len(), source.len())),
57            None => Err(context.error_expected(source.len(), "line")),
58        }
59    }
60
61    fn report_incomplete_match(context: &mut ParseContext, end: usize) -> Reported {
62        context.report(ParseError::new_line_extra(context.source(), end))
63    }
64}
65
66/// A "section" is a sequence of zero or more nonblank lines, starting either
67/// at the beginning of the input or immediately after a newline; followed by
68/// either a blank line or the end of input.
69#[derive(Debug, Clone, Copy)]
70pub struct Section;
71
72impl Region for Section {
73    fn check_at_start(context: &mut ParseContext, start: usize) -> Result<(), Reported> {
74        let source = context.source();
75        if start == 0 || &source[..start] == "\n" || source[..start].ends_with("\n\n") {
76            Ok(())
77        } else {
78            Err(context.report(ParseError::new_bad_section_start(source, start)))
79        }
80    }
81
82    fn find_end(context: &mut ParseContext, start: usize) -> Result<(usize, usize), Reported> {
83        // FIXME BUG: unclear what this should do when looking at an empty
84        // section at end of input. presumably not repeat forever. (why does
85        // this not always hang forever if you try to use `sections`?)
86        let source = context.source();
87        match source[start..].find("\n\n") {
88            // ending at a blank line
89            Some(index) => Ok((start + index + 1, start + index + 2)),
90            // ending at the end of `source`
91            None if start < source.len() => Ok((source.len(), source.len())),
92            // no end-of-section delimiter found
93            None => Err(context.error_expected(source.len(), "section")),
94        }
95    }
96
97    fn report_incomplete_match(context: &mut ParseContext, end: usize) -> Reported {
98        context.report(ParseError::new_section_extra(context.source(), end))
99    }
100}
101
102/// Match but don't convert; just return the ParseIter on success. Expects all
103/// of `source` to be matched, otherwise it's an error.
104fn match_fully<'parse, R, P>(
105    context: &mut ParseContext<'parse>,
106    parser: &'parse P,
107) -> Result<P::Iter<'parse>, Reported>
108where
109    R: Region,
110    P: Parser,
111{
112    let source = context.source();
113    let mut iter = parser.parse_iter(context, 0)?;
114    while iter.match_end() != source.len() {
115        R::report_incomplete_match(context, iter.match_end());
116        iter.backtrack(context)?;
117    }
118    Ok(iter)
119}
120
121#[derive(Copy, Clone)]
122pub struct RegionParser<R: Region, P> {
123    parser: P,
124    phantom: PhantomData<fn() -> R>,
125}
126
127impl<R, P> Parser for RegionParser<R, P>
128where
129    R: Region,
130    P: Parser,
131{
132    type RawOutput = (P::Output,);
133    type Output = P::Output;
134    type Iter<'parse> = RegionParseIter<'parse, P>
135    where
136        R: 'parse,
137        P: 'parse;
138
139    fn parse_iter<'parse>(
140        &'parse self,
141        context: &mut ParseContext<'parse>,
142        start: usize,
143    ) -> Result<Self::Iter<'parse>, Reported> {
144        R::check_at_start(context, start)?;
145        let (inner_end, outer_end) = R::find_end(context, start)?;
146
147        let iter = context.with_slice(start, inner_end, |inner_context| {
148            match_fully::<R, P>(inner_context, &self.parser)
149        })?;
150        Ok(RegionParseIter { iter, outer_end })
151    }
152}
153
154pub struct RegionParseIter<'parse, P>
155where
156    P: Parser + 'parse,
157{
158    iter: P::Iter<'parse>,
159    outer_end: usize,
160}
161
162impl<'parse, P> ParseIter<'parse> for RegionParseIter<'parse, P>
163where
164    P: Parser,
165{
166    type RawOutput = (P::Output,);
167
168    fn match_end(&self) -> usize {
169        self.outer_end
170    }
171
172    fn backtrack(&mut self, _context: &mut ParseContext<'parse>) -> Result<(), Reported> {
173        Err(Reported)
174    }
175
176    fn convert(&self) -> Self::RawOutput {
177        let v = self.iter.convert().into_user_type();
178        (v,)
179    }
180}
181
182pub type LineParser<P> = RegionParser<Line, P>;
183pub type SectionParser<P> = RegionParser<Section, P>;
184
185/// <code>line(<var>pattern</var>)</code> matches a single line of text that
186/// matches *pattern*, and the newline at the end of the line.
187///
188/// This is like <code>^<var>pattern</var>\n</code> in regular expressions,
189/// except <code>line(<var>pattern</var>)</code> will only ever match exactly
190/// one line of text, even if *pattern* could match more newlines.
191///
192/// `line(string(any_char+))` matches a line of text, strips off the newline
193/// character, and returns the rest as a `String`.
194///
195/// `line("")` matches a blank line.
196pub fn line<P>(parser: P) -> LineParser<P> {
197    LineParser {
198        parser,
199        phantom: PhantomData,
200    }
201}
202
203/// <code>lines(<var>pattern</var>)</code> matches any number of lines of text
204/// matching *pattern*. Each line must be terminated by a newline, `'\n'`.
205///
206/// Equivalent to <code>line(<var>pattern</var>)*</code>.
207///
208/// ```
209/// # use aoc_parse::{parser, prelude::*};
210/// let p = parser!(lines(repeat_sep(digit, " ")));
211/// assert_eq!(
212///     p.parse("1 2 3\n4 5 6\n").unwrap(),
213///     vec![vec![1, 2, 3], vec![4, 5, 6]],
214/// );
215/// ```
216pub fn lines<P>(parser: P) -> RepeatParser<LineParser<P>, EmptyParser> {
217    star(line(parser))
218}
219
220/// <code>section(<var>pattern</var>)</code> matches zero or more nonblank
221/// lines, followed by either a blank line or the end of input. The nonblank
222/// lines must match *pattern*.
223///
224/// `section()` consumes the blank line. *pattern* should not expect to see it.
225///
226/// It's common for an AoC puzzle input to have several lines of data, then a
227/// blank line, and then a different kind of data. You can parse this with
228/// <code>section(<var>p1</var>) section(<var>p2</var>)</code>.
229///
230/// `section(lines(u64))` matches a section that's a list of numbers, one per
231/// line.
232pub fn section<P>(parser: P) -> SectionParser<P> {
233    SectionParser {
234        parser,
235        phantom: PhantomData,
236    }
237}
238
239/// <code>sections(<var>pattern</var>)</code> matches any number of sections
240/// matching *pattern*. Equivalent to
241/// <code>section(<var>pattern</var>)*</code>.
242pub fn sections<P>(parser: P) -> RepeatParser<SectionParser<P>, EmptyParser> {
243    star(section(parser))
244}
245
246#[cfg(test)]
247mod tests {
248    use super::{line, section};
249    use crate::prelude::u32;
250    use crate::testing::*;
251
252    #[test]
253    fn test_newline_handling() {
254        let p = line("hello world");
255        assert_parse_eq(p, "hello world\n", ());
256        assert_parse_eq(p, "hello world", ());
257        assert_no_parse(p, "hello world\n\n");
258
259        let p = sequence(line("dog"), line("cat"));
260        assert_no_parse(p, "dog\n");
261        assert_no_parse(p, "dogcat");
262        assert_no_parse(p, "dogcat\n");
263        assert_parse_eq(p, "dog\ncat", ((), ()));
264        assert_parse_eq(p, "dog\ncat\n", ((), ()));
265
266        let p = section(plus(line(u32)));
267        assert_no_parse(p, "15\n16\n\n\n");
268        assert_parse_eq(p, "15\n16\n\n", vec![15, 16]);
269        assert_parse_eq(p, "15\n16\n", vec![15, 16]);
270        assert_parse_eq(p, "15\n16", vec![15, 16]);
271
272        let p = sequence(section(line("sec1")), section(line("sec2")));
273        assert_parse_eq(p, "sec1\n\nsec2\n\n", ((), ()));
274        assert_parse_eq(p, "sec1\n\nsec2\n", ((), ()));
275        assert_parse_eq(p, "sec1\n\nsec2", ((), ()));
276        assert_no_parse(p, "sec1\nsec2\n\n");
277        assert_no_parse(p, "sec1\nsec2\n");
278        assert_no_parse(p, "sec1\nsec2");
279        assert_no_parse(p, "sec1sec2\n\n");
280    }
281}