nom_supreme/
multi.rs

1/*!
2Perfected looping parsers designed to behave more reliably and provide more
3useful parse errors.
4
5The combinators in this module all generally follow the same pattern for
6parsing in a loop. They parse an item; then, they attempt to parse a
7`terminator`. If the `terminator` is found, the parse returns successfully;
8otherwise, they attempt to parse a `separator`. If they fail to parse either
9a `separator` or a `terminator`, the parse fails; otherwise, it will continue
10on to parse the next item. The parsed items are collected together into a final
11value; each combinator does this in a slightly different way:
12
13- [`collect_separated_terminated`] collects the parsed items into a collection
14with [`Extend`].
15- [`parse_separated_terminated`] combines the parsed items with a folding
16function.
17- [`parse_separated_terminated_res`] combines the parsed items with a fallible
18folding function; it may return early if the folding function returns an
19[`Err`].
20
21These combinators always parse at least 1 item. If you want 0 or more things
22to be parsed, use [`opt`] or [`alt`] to handle that case.
23
24These combinators will stop as soon as they find a `terminator`. If you wish
25to have a `terminator` parser that is the same as your `separator`, you'll need
26to add some extra context to the terminator parser; perhaps a lookahead
27with [`peek`].
28
29These combinators exists to provide meaningful parse errors. By requiring a
30`terminator`, we can ensure that they don't suffer from the normal folding
31parser problem of unconditionally returning success because a subparser failure
32is interpreted as the end of the loop. This ensures that potentially important
33errors aren't thrown away.
34
35The combinators will attempt to smartly allow 0-length matches. It will allow
36subparsers to have 0-length matches, but if a full loop is made without any
37progress being made, we assume we've encountered an infinite loop and return
38a parse error.
39
40[`opt`]: crate::parser_ext::ParserExt::opt
41[`alt`]: nom::branch::alt
42[`peek`]: crate::parser_ext::ParserExt::peek
43*/
44
45use core::{convert::Infallible, iter};
46
47use nom::{
48    error::{ErrorKind::SeparatedNonEmptyList, FromExternalError, ParseError},
49    Err::Error,
50    InputLength, Parser,
51};
52
53/**
54Parse a series of 1 or more things, separated by `separator`, terminated by
55`terminator`, and collect them into a collection using [`Extend`].
56
57When this parser is run, it will create a new, empty collection with
58[`Default`]. It will then collect each parsed item into the collection with
59[`Extend`]. See the [module] docs for details of how this parser parses a sequence.
60
61See the [module] docs for a detailed description of how this parser parses a sequence.
62
63# Example
64
65```
66use nom_supreme::{
67    multi::collect_separated_terminated,
68    parser_ext::ParserExt,
69    error::ErrorTree,
70};
71use nom::character::complete::{digit1, char, space0};
72use nom::{IResult, Parser, error::ParseError};
73
74fn parse_number(input: &str) -> IResult<&str, i32, ErrorTree<&str>> {
75    digit1
76        .preceded_by(char('-').opt())
77        .recognize()
78        .parse_from_str()
79        .parse(input)
80}
81
82// A vector is a square brackets, containing comma separated numbers, with
83// whitespace in between
84let mut vec_parser = collect_separated_terminated(
85    // Parse numbers
86    parse_number.terminated(space0),
87
88    // separated by commas
89    char(',').terminated(space0),
90
91    // terminated by a close bracket
92    char(']'),
93)
94// Allow for empty vectors
95.or(char(']').value(Vec::new()))
96.preceded_by(char('[').terminated(space0));
97
98let result: IResult<&str, Vec<i32>, ErrorTree<&str>> = vec_parser.parse("[1, 2, -3, 4]");
99let vec = result.unwrap().1;
100assert_eq!(vec, [1, 2, -3, 4]);
101
102```
103
104[module]: crate::multi
105*/
106pub fn collect_separated_terminated<
107    Input,
108    ParseOutput,
109    SepOutput,
110    TermOutput,
111    ParseErr,
112    Collection,
113>(
114    parser: impl Parser<Input, ParseOutput, ParseErr>,
115    separator: impl Parser<Input, SepOutput, ParseErr>,
116    terminator: impl Parser<Input, TermOutput, ParseErr>,
117) -> impl Parser<Input, Collection, ParseErr>
118where
119    Input: Clone + InputLength,
120    ParseErr: ParseError<Input>,
121    Collection: Default + Extend<ParseOutput>,
122{
123    parse_separated_terminated(
124        parser,
125        separator,
126        terminator,
127        Collection::default,
128        // TODO: use extend_one
129        |collection, item| express!(collection.extend(iter::once(item))),
130    )
131}
132
133/**
134Parse a series of 1 or more things, separated by `separator`, terminated by
135`terminator`, and fold them together using a folding function.
136
137When this parser is run, it will first create an accumulator value with `init`.
138It will then combine it with every parsed item using `fold`, which should
139return the new accumulator for each item. See the [module] docs for details
140of how this parser parses a sequence.
141
142[module]: crate::multi
143*/
144#[inline]
145pub fn parse_separated_terminated<Input, ParseOutput, SepOutput, TermOutput, ParseErr, Accum>(
146    parser: impl Parser<Input, ParseOutput, ParseErr>,
147    separator: impl Parser<Input, SepOutput, ParseErr>,
148    terminator: impl Parser<Input, TermOutput, ParseErr>,
149
150    init: impl FnMut() -> Accum,
151    mut fold: impl FnMut(Accum, ParseOutput) -> Accum,
152) -> impl Parser<Input, Accum, ParseErr>
153where
154    Input: Clone + InputLength,
155    ParseErr: ParseError<Input>,
156{
157    parse_separated_terminated_impl(
158        parser,
159        separator,
160        terminator,
161        init,
162        move |accum, item| Ok(fold(accum, item)),
163        |_input, err: Infallible| match err {},
164    )
165}
166
167/**
168Parse a series of 1 or more things, separated by some `separator`, terminated
169by some `terminator`, folding them all together with a fallible fold function.
170
171This function is identical to [`parse_separated_terminated`], except that
172the fold function may return an error, which ends the parse early. See its
173documentation for more details about the precise behavior of this parser.
174*/
175#[inline]
176pub fn parse_separated_terminated_res<
177    Input,
178    ParseOutput,
179    SepOutput,
180    TermOutput,
181    ParseErr,
182    Accum,
183    FoldErr,
184>(
185    parser: impl Parser<Input, ParseOutput, ParseErr>,
186    separator: impl Parser<Input, SepOutput, ParseErr>,
187    terminator: impl Parser<Input, TermOutput, ParseErr>,
188
189    init: impl FnMut() -> Accum,
190    fold: impl FnMut(Accum, ParseOutput) -> Result<Accum, FoldErr>,
191) -> impl Parser<Input, Accum, ParseErr>
192where
193    Input: Clone + InputLength,
194    ParseErr: ParseError<Input> + FromExternalError<Input, FoldErr>,
195{
196    parse_separated_terminated_impl(parser, separator, terminator, init, fold, |input, err| {
197        ParseErr::from_external_error(input, SeparatedNonEmptyList, err)
198    })
199}
200
201/// Helper enum for tracking zero length parses. `parse_separated_terminated`
202/// allows for subparsers to return zero-length matches, but if *every*
203/// subparser does so in a loop, that's reported as an error.
204///
205/// This enum specifically tracks the least-recent zero-length parse that has
206/// not been succeeded by a non-zero-length parser.
207#[derive(Debug, Clone, Copy, PartialEq, Eq)]
208enum ZeroLengthParseState<E> {
209    None,
210    Item,
211    Separator { terminator_error: E },
212}
213
214impl<E> ZeroLengthParseState<E> {
215    fn terminator_error(self) -> Option<E> {
216        match self {
217            Self::Separator { terminator_error } => Some(terminator_error),
218            _ => None,
219        }
220    }
221}
222
223/// Shared implementation for parse_separated_terminated_res and
224/// parse_separated_terminated. This exists so that we don't have to have an
225/// unnecessary bound of FromExternalError on parse_separated_terminated.
226#[inline]
227fn parse_separated_terminated_impl<
228    Input,
229    ParseOutput,
230    SepOutput,
231    TermOutput,
232    ParseErr,
233    Accum,
234    FoldErr,
235>(
236    mut parser: impl Parser<Input, ParseOutput, ParseErr>,
237    mut separator: impl Parser<Input, SepOutput, ParseErr>,
238    mut terminator: impl Parser<Input, TermOutput, ParseErr>,
239
240    mut init: impl FnMut() -> Accum,
241    mut fold: impl FnMut(Accum, ParseOutput) -> Result<Accum, FoldErr>,
242
243    mut build_error: impl FnMut(Input, FoldErr) -> ParseErr,
244) -> impl Parser<Input, Accum, ParseErr>
245where
246    Input: Clone + InputLength,
247    ParseErr: ParseError<Input>,
248{
249    move |mut input: Input| {
250        let mut accum = init();
251
252        let mut zero_length_state = ZeroLengthParseState::None;
253
254        loop {
255            // Try to find a value. To fail to do so at this point is an
256            // error, since we either just started or successfully parsed a
257            // separator.
258            //
259            // If an error occurs here, also try to attach terminator_error.
260            // terminator_error is available if the most recent separator parse
261            // was zero-length, which means that both the terminator and the
262            // item would be valid parses at this point.
263            let (tail, value) = match parser.parse(input.clone()) {
264                Ok(success) => success,
265                Err(Error(item_error)) => {
266                    break Err(Error(match zero_length_state.terminator_error() {
267                        None => item_error,
268                        Some(terminator_error) => item_error.or(terminator_error),
269                    }))
270                }
271                Err(err) => break Err(err),
272            };
273
274            // Check zero-length matches
275            zero_length_state = match (input.input_len() == tail.input_len(), zero_length_state) {
276                // If both the item and the separator had a zero length match,
277                // we're hanging. Bail.
278                //
279                // It doesn't make sense to include the terminator error here,
280                // because we *did* successfully parse a separator and an
281                // item, they just happened to be zero length
282                (true, ZeroLengthParseState::Separator { .. }) => {
283                    break Err(Error(ParseErr::from_error_kind(
284                        input,
285                        SeparatedNonEmptyList,
286                    )))
287                }
288
289                // If only the item had a zero-length match, update the
290                // state.
291                (true, _) => ZeroLengthParseState::Item,
292
293                // If the item had a non-zero length match, clear the state
294                (false, _) => ZeroLengthParseState::None,
295            };
296
297            // Advance the loop state
298            accum = fold(accum, value).map_err(|err| Error(build_error(input, err)))?;
299            input = tail;
300
301            // Try to find a terminator; if we found it, we're done. If we
302            // didn't, preserve the error, so that it can be reported as an
303            // .or() branch with the subsequent separator or item error.
304            let terminator_error = match terminator.parse(input.clone()) {
305                // We found a terminator, so we're done
306                Ok((tail, _)) => break Ok((tail, accum)),
307
308                // No terminator. Keep track of the error in case we also fail
309                // to find a separator or item.
310                Err(Error(err)) => err,
311
312                // Other kinds of errors should be returned immediately.
313                Err(err) => break Err(err),
314            };
315
316            // No terminator, so instead try to find a separator
317            let tail = match separator.parse(input.clone()) {
318                Ok((tail, _)) => tail,
319                Err(Error(separator_error)) => {
320                    break Err(Error(separator_error.or(terminator_error)))
321                }
322                Err(err) => break Err(err),
323            };
324
325            // Check zero-length matches
326            zero_length_state = match (input.input_len() == tail.input_len(), zero_length_state) {
327                // If both the separator and the item had a zero length match,
328                // we're hanging. Bail.
329                (true, ZeroLengthParseState::Item) => {
330                    break Err(Error(ParseErr::from_error_kind(
331                        input,
332                        SeparatedNonEmptyList,
333                    )))
334                }
335                // If only the separator had a zero-length match, update the
336                // state. Additionally preserve the terminator error so that
337                // it can be reported as an alternate if there was an item
338                // error.
339                (true, _) => ZeroLengthParseState::Separator { terminator_error },
340
341                // If the separator had a non-zero length match, clear the
342                // state
343                (false, _) => ZeroLengthParseState::None,
344            };
345
346            // Advance the loop state
347            input = tail;
348        }
349    }
350}
351
352#[cfg(test)]
353mod test_separated_terminated {
354    use cool_asserts::assert_matches;
355    use nom::{
356        branch::alt,
357        character::complete::{alpha0, char, digit1, space0},
358        error::ErrorKind,
359        Err, IResult, Parser,
360    };
361
362    use crate::error::{BaseErrorKind, ErrorTree, Expectation};
363    use crate::parser_ext::ParserExt;
364
365    use super::parse_separated_terminated;
366
367    /// Parse a series of numbers, separated by commas, terminated by a period.
368    fn parse_number_list(input: &str) -> IResult<&str, Vec<i64>, ErrorTree<&str>> {
369        parse_separated_terminated(
370            digit1.parse_from_str(),
371            char(',').delimited_by(space0),
372            char('.').preceded_by(space0),
373            Vec::new,
374            |vec, num| express!(vec.push(num)),
375        )
376        .parse(input)
377    }
378
379    #[test]
380    fn basic() {
381        assert_eq!(
382            parse_number_list("1, 2, 3, 4, 5.").unwrap(),
383            ("", vec![1, 2, 3, 4, 5]),
384        )
385    }
386
387    #[test]
388    fn trailing_input() {
389        assert_eq!(
390            parse_number_list("1, 2, 3, 4, 5. 4, 5, 6.").unwrap(),
391            (" 4, 5, 6.", vec![1, 2, 3, 4, 5]),
392        )
393    }
394
395    #[test]
396    fn only_one() {
397        assert_eq!(parse_number_list("10.").unwrap(), ("", vec![10]),)
398    }
399
400    #[test]
401    fn at_least_one() {
402        let err = parse_number_list("abc").unwrap_err();
403
404        assert_matches!(
405            err,
406            Err::Error(ErrorTree::Base {
407                location: "abc",
408                kind: BaseErrorKind::Expected(Expectation::Digit)
409            })
410        );
411    }
412
413    /// Test that a parse failure from both the separator and the terminator
414    /// causes an error including both messages
415    #[test]
416    fn terminator_separator_miss() {
417        let err = parse_number_list("10, 20 30.").unwrap_err();
418
419        let choices = assert_matches!(err, Err::Error(ErrorTree::Alt(choices)) => choices);
420        assert_matches!(
421            choices.as_slice(),
422            [
423                ErrorTree::Base {
424                    location: "30.",
425                    kind: BaseErrorKind::Expected(Expectation::Char(','))
426                },
427                ErrorTree::Base {
428                    location: "30.",
429                    kind: BaseErrorKind::Expected(Expectation::Char('.'))
430                },
431            ]
432        );
433    }
434
435    /// Test that a terminator is required, even at EoF
436    #[test]
437    fn required_terminator() {
438        let err = parse_number_list("1, 2, 3").unwrap_err();
439
440        let choices = assert_matches!(err, Err::Error(ErrorTree::Alt(choices)) => choices);
441        assert_matches!(
442            choices.as_slice(),
443            [
444                ErrorTree::Base {
445                    location: "",
446                    kind: BaseErrorKind::Expected(Expectation::Char(','))
447                },
448                ErrorTree::Base {
449                    location: "",
450                    kind: BaseErrorKind::Expected(Expectation::Char('.'))
451                },
452            ]
453        );
454    }
455
456    /// Test that a parse failure from the item parser includes only that error
457    /// if the separator isn't zero-length
458    #[test]
459    fn item_error() {
460        let err = parse_number_list("1, 2, abc.").unwrap_err();
461
462        assert_matches!(
463            err,
464            Err::Error(ErrorTree::Base {
465                location: "abc.",
466                kind: BaseErrorKind::Expected(Expectation::Digit),
467            })
468        );
469    }
470
471    /// Parse a series of numbers ending in periods, separated by 0 or more
472    /// whitespace, terminated by a semicolon. Used to test 0-length
473    /// separator behavior.
474    fn parse_number_dot_list(input: &str) -> IResult<&str, Vec<i64>, ErrorTree<&str>> {
475        parse_separated_terminated(
476            digit1.terminated(char('.')).parse_from_str(),
477            space0,
478            char(';'),
479            Vec::new,
480            |vec, num| express!(vec.push(num)),
481        )
482        .parse(input)
483    }
484
485    #[test]
486    fn zero_length_separator() {
487        assert_eq!(
488            parse_number_dot_list("1.2. 3.4.  5.; abc").unwrap(),
489            (" abc", vec![1, 2, 3, 4, 5])
490        );
491    }
492
493    /// Test that, when a separator matches zero length, and then the item
494    /// parser fails, the returned error includes both the item error and the
495    /// terminator error.
496    #[test]
497    fn zero_length_separator_item_term_error() {
498        let err = parse_number_dot_list("1.2.3.abc.;").unwrap_err();
499
500        let choices = assert_matches!(err, Err::Error(ErrorTree::Alt(choices)) => choices);
501        assert_matches!(
502            choices.as_slice(),
503            [
504                ErrorTree::Base {
505                    location: "abc.;",
506                    kind: BaseErrorKind::Expected(Expectation::Digit)
507                },
508                ErrorTree::Base {
509                    location: "abc.;",
510                    kind: BaseErrorKind::Expected(Expectation::Char(';'))
511                },
512            ]
513        );
514    }
515
516    /// Parse a series of runs of 1 or more digits or 0 more more letters, separated by
517    /// an optional dash, terminated by a semicolon. Used to test
518    /// infinite loop detection
519    fn parse_letters_numbers(input: &str) -> IResult<&str, Vec<&str>, ErrorTree<&str>> {
520        parse_separated_terminated(
521            alt((digit1, alpha0)),
522            char('-').opt(),
523            char(';'),
524            Vec::new,
525            |vec, num| express!(vec.push(num)),
526        )
527        .parse(input)
528    }
529
530    #[test]
531    fn zero_length_item() {
532        assert_eq!(
533            parse_letters_numbers("----; abc").unwrap(),
534            (" abc", vec!["", "", "", "", ""])
535        )
536    }
537
538    #[test]
539    fn zero_length_separators() {
540        assert_eq!(
541            parse_letters_numbers("abc123abc123; abc").unwrap(),
542            (" abc", vec!["abc", "123", "abc", "123"]),
543        )
544    }
545
546    /// Test that both zero-length separators and items are allowed together,
547    /// as long as the loop makes progress
548    #[test]
549    fn zero_length_mixed() {
550        assert_eq!(
551            parse_letters_numbers("abc--123abc-123-; abc").unwrap(),
552            (" abc", vec!["abc", "", "123", "abc", "123", ""]),
553        )
554    }
555
556    /// Test that if the loop makes no progress, that's an error
557    #[test]
558    fn infinite_loop_aborts() {
559        let err = parse_letters_numbers("abc123-.; abc").unwrap_err();
560
561        assert_matches!(
562            err,
563            Err::Error(ErrorTree::Base {
564                location: ".; abc",
565                kind: BaseErrorKind::Kind(ErrorKind::SeparatedNonEmptyList)
566            })
567        );
568    }
569
570    /// Parse a series of numbers, separated by commas, terminated by optional
571    /// comma and eof. Used to test that the terminator "wins" when it and the
572    /// separator can match the same string.
573    fn parse_comma_separated(input: &str) -> IResult<&str, Vec<i64>, ErrorTree<&str>> {
574        parse_separated_terminated(
575            digit1.parse_from_str(),
576            char(','),
577            char(',').opt().all_consuming(),
578            Vec::new,
579            |vec, num| express!(vec.push(num)),
580        )
581        .parse(input)
582    }
583
584    #[test]
585    fn empty_terminator_wins() {
586        assert_eq!(
587            parse_comma_separated("1,2,3,4").unwrap(),
588            ("", vec![1, 2, 3, 4]),
589        );
590    }
591
592    #[test]
593    fn test_terminator_wins() {
594        assert_eq!(
595            parse_comma_separated("1,2,3,4,").unwrap(),
596            ("", vec![1, 2, 3, 4]),
597        )
598    }
599}
nom_supreme/multi.rs

nom_supreme/
multi.rs