nom_supreme/multi.rs
1/*!
2Perfected looping parsers designed to behave more reliably and provide more
3useful parse errors.
4
5The combinators in this module all generally follow the same pattern for
6parsing in a loop. They parse an item; then, they attempt to parse a
7`terminator`. If the `terminator` is found, the parse returns successfully;
8otherwise, they attempt to parse a `separator`. If they fail to parse either
9a `separator` or a `terminator`, the parse fails; otherwise, it will continue
10on to parse the next item. The parsed items are collected together into a final
11value; each combinator does this in a slightly different way:
12
13- [`collect_separated_terminated`] collects the parsed items into a collection
14with [`Extend`].
15- [`parse_separated_terminated`] combines the parsed items with a folding
16function.
17- [`parse_separated_terminated_res`] combines the parsed items with a fallible
18folding function; it may return early if the folding function returns an
19[`Err`].
20
21These combinators always parse at least 1 item. If you want 0 or more things
22to be parsed, use [`opt`] or [`alt`] to handle that case.
23
24These combinators will stop as soon as they find a `terminator`. If you wish
25to have a `terminator` parser that is the same as your `separator`, you'll need
26to add some extra context to the terminator parser; perhaps a lookahead
27with [`peek`].
28
29These combinators exists to provide meaningful parse errors. By requiring a
30`terminator`, we can ensure that they don't suffer from the normal folding
31parser problem of unconditionally returning success because a subparser failure
32is interpreted as the end of the loop. This ensures that potentially important
33errors aren't thrown away.
34
35The combinators will attempt to smartly allow 0-length matches. It will allow
36subparsers to have 0-length matches, but if a full loop is made without any
37progress being made, we assume we've encountered an infinite loop and return
38a parse error.
39
40[`opt`]: crate::parser_ext::ParserExt::opt
41[`alt`]: nom::branch::alt
42[`peek`]: crate::parser_ext::ParserExt::peek
43*/
44
45use core::{convert::Infallible, iter};
46
47use nom::{
48 error::{ErrorKind::SeparatedNonEmptyList, FromExternalError, ParseError},
49 Err::Error,
50 InputLength, Parser,
51};
52
53/**
54Parse a series of 1 or more things, separated by `separator`, terminated by
55`terminator`, and collect them into a collection using [`Extend`].
56
57When this parser is run, it will create a new, empty collection with
58[`Default`]. It will then collect each parsed item into the collection with
59[`Extend`]. See the [module] docs for details of how this parser parses a sequence.
60
61See the [module] docs for a detailed description of how this parser parses a sequence.
62
63# Example
64
65```
66use nom_supreme::{
67 multi::collect_separated_terminated,
68 parser_ext::ParserExt,
69 error::ErrorTree,
70};
71use nom::character::complete::{digit1, char, space0};
72use nom::{IResult, Parser, error::ParseError};
73
74fn parse_number(input: &str) -> IResult<&str, i32, ErrorTree<&str>> {
75 digit1
76 .preceded_by(char('-').opt())
77 .recognize()
78 .parse_from_str()
79 .parse(input)
80}
81
82// A vector is a square brackets, containing comma separated numbers, with
83// whitespace in between
84let mut vec_parser = collect_separated_terminated(
85 // Parse numbers
86 parse_number.terminated(space0),
87
88 // separated by commas
89 char(',').terminated(space0),
90
91 // terminated by a close bracket
92 char(']'),
93)
94// Allow for empty vectors
95.or(char(']').value(Vec::new()))
96.preceded_by(char('[').terminated(space0));
97
98let result: IResult<&str, Vec<i32>, ErrorTree<&str>> = vec_parser.parse("[1, 2, -3, 4]");
99let vec = result.unwrap().1;
100assert_eq!(vec, [1, 2, -3, 4]);
101
102```
103
104[module]: crate::multi
105*/
106pub fn collect_separated_terminated<
107 Input,
108 ParseOutput,
109 SepOutput,
110 TermOutput,
111 ParseErr,
112 Collection,
113>(
114 parser: impl Parser<Input, ParseOutput, ParseErr>,
115 separator: impl Parser<Input, SepOutput, ParseErr>,
116 terminator: impl Parser<Input, TermOutput, ParseErr>,
117) -> impl Parser<Input, Collection, ParseErr>
118where
119 Input: Clone + InputLength,
120 ParseErr: ParseError<Input>,
121 Collection: Default + Extend<ParseOutput>,
122{
123 parse_separated_terminated(
124 parser,
125 separator,
126 terminator,
127 Collection::default,
128 // TODO: use extend_one
129 |collection, item| express!(collection.extend(iter::once(item))),
130 )
131}
132
133/**
134Parse a series of 1 or more things, separated by `separator`, terminated by
135`terminator`, and fold them together using a folding function.
136
137When this parser is run, it will first create an accumulator value with `init`.
138It will then combine it with every parsed item using `fold`, which should
139return the new accumulator for each item. See the [module] docs for details
140of how this parser parses a sequence.
141
142[module]: crate::multi
143*/
144#[inline]
145pub fn parse_separated_terminated<Input, ParseOutput, SepOutput, TermOutput, ParseErr, Accum>(
146 parser: impl Parser<Input, ParseOutput, ParseErr>,
147 separator: impl Parser<Input, SepOutput, ParseErr>,
148 terminator: impl Parser<Input, TermOutput, ParseErr>,
149
150 init: impl FnMut() -> Accum,
151 mut fold: impl FnMut(Accum, ParseOutput) -> Accum,
152) -> impl Parser<Input, Accum, ParseErr>
153where
154 Input: Clone + InputLength,
155 ParseErr: ParseError<Input>,
156{
157 parse_separated_terminated_impl(
158 parser,
159 separator,
160 terminator,
161 init,
162 move |accum, item| Ok(fold(accum, item)),
163 |_input, err: Infallible| match err {},
164 )
165}
166
167/**
168Parse a series of 1 or more things, separated by some `separator`, terminated
169by some `terminator`, folding them all together with a fallible fold function.
170
171This function is identical to [`parse_separated_terminated`], except that
172the fold function may return an error, which ends the parse early. See its
173documentation for more details about the precise behavior of this parser.
174*/
175#[inline]
176pub fn parse_separated_terminated_res<
177 Input,
178 ParseOutput,
179 SepOutput,
180 TermOutput,
181 ParseErr,
182 Accum,
183 FoldErr,
184>(
185 parser: impl Parser<Input, ParseOutput, ParseErr>,
186 separator: impl Parser<Input, SepOutput, ParseErr>,
187 terminator: impl Parser<Input, TermOutput, ParseErr>,
188
189 init: impl FnMut() -> Accum,
190 fold: impl FnMut(Accum, ParseOutput) -> Result<Accum, FoldErr>,
191) -> impl Parser<Input, Accum, ParseErr>
192where
193 Input: Clone + InputLength,
194 ParseErr: ParseError<Input> + FromExternalError<Input, FoldErr>,
195{
196 parse_separated_terminated_impl(parser, separator, terminator, init, fold, |input, err| {
197 ParseErr::from_external_error(input, SeparatedNonEmptyList, err)
198 })
199}
200
201/// Helper enum for tracking zero length parses. `parse_separated_terminated`
202/// allows for subparsers to return zero-length matches, but if *every*
203/// subparser does so in a loop, that's reported as an error.
204///
205/// This enum specifically tracks the least-recent zero-length parse that has
206/// not been succeeded by a non-zero-length parser.
207#[derive(Debug, Clone, Copy, PartialEq, Eq)]
208enum ZeroLengthParseState<E> {
209 None,
210 Item,
211 Separator { terminator_error: E },
212}
213
214impl<E> ZeroLengthParseState<E> {
215 fn terminator_error(self) -> Option<E> {
216 match self {
217 Self::Separator { terminator_error } => Some(terminator_error),
218 _ => None,
219 }
220 }
221}
222
223/// Shared implementation for parse_separated_terminated_res and
224/// parse_separated_terminated. This exists so that we don't have to have an
225/// unnecessary bound of FromExternalError on parse_separated_terminated.
226#[inline]
227fn parse_separated_terminated_impl<
228 Input,
229 ParseOutput,
230 SepOutput,
231 TermOutput,
232 ParseErr,
233 Accum,
234 FoldErr,
235>(
236 mut parser: impl Parser<Input, ParseOutput, ParseErr>,
237 mut separator: impl Parser<Input, SepOutput, ParseErr>,
238 mut terminator: impl Parser<Input, TermOutput, ParseErr>,
239
240 mut init: impl FnMut() -> Accum,
241 mut fold: impl FnMut(Accum, ParseOutput) -> Result<Accum, FoldErr>,
242
243 mut build_error: impl FnMut(Input, FoldErr) -> ParseErr,
244) -> impl Parser<Input, Accum, ParseErr>
245where
246 Input: Clone + InputLength,
247 ParseErr: ParseError<Input>,
248{
249 move |mut input: Input| {
250 let mut accum = init();
251
252 let mut zero_length_state = ZeroLengthParseState::None;
253
254 loop {
255 // Try to find a value. To fail to do so at this point is an
256 // error, since we either just started or successfully parsed a
257 // separator.
258 //
259 // If an error occurs here, also try to attach terminator_error.
260 // terminator_error is available if the most recent separator parse
261 // was zero-length, which means that both the terminator and the
262 // item would be valid parses at this point.
263 let (tail, value) = match parser.parse(input.clone()) {
264 Ok(success) => success,
265 Err(Error(item_error)) => {
266 break Err(Error(match zero_length_state.terminator_error() {
267 None => item_error,
268 Some(terminator_error) => item_error.or(terminator_error),
269 }))
270 }
271 Err(err) => break Err(err),
272 };
273
274 // Check zero-length matches
275 zero_length_state = match (input.input_len() == tail.input_len(), zero_length_state) {
276 // If both the item and the separator had a zero length match,
277 // we're hanging. Bail.
278 //
279 // It doesn't make sense to include the terminator error here,
280 // because we *did* successfully parse a separator and an
281 // item, they just happened to be zero length
282 (true, ZeroLengthParseState::Separator { .. }) => {
283 break Err(Error(ParseErr::from_error_kind(
284 input,
285 SeparatedNonEmptyList,
286 )))
287 }
288
289 // If only the item had a zero-length match, update the
290 // state.
291 (true, _) => ZeroLengthParseState::Item,
292
293 // If the item had a non-zero length match, clear the state
294 (false, _) => ZeroLengthParseState::None,
295 };
296
297 // Advance the loop state
298 accum = fold(accum, value).map_err(|err| Error(build_error(input, err)))?;
299 input = tail;
300
301 // Try to find a terminator; if we found it, we're done. If we
302 // didn't, preserve the error, so that it can be reported as an
303 // .or() branch with the subsequent separator or item error.
304 let terminator_error = match terminator.parse(input.clone()) {
305 // We found a terminator, so we're done
306 Ok((tail, _)) => break Ok((tail, accum)),
307
308 // No terminator. Keep track of the error in case we also fail
309 // to find a separator or item.
310 Err(Error(err)) => err,
311
312 // Other kinds of errors should be returned immediately.
313 Err(err) => break Err(err),
314 };
315
316 // No terminator, so instead try to find a separator
317 let tail = match separator.parse(input.clone()) {
318 Ok((tail, _)) => tail,
319 Err(Error(separator_error)) => {
320 break Err(Error(separator_error.or(terminator_error)))
321 }
322 Err(err) => break Err(err),
323 };
324
325 // Check zero-length matches
326 zero_length_state = match (input.input_len() == tail.input_len(), zero_length_state) {
327 // If both the separator and the item had a zero length match,
328 // we're hanging. Bail.
329 (true, ZeroLengthParseState::Item) => {
330 break Err(Error(ParseErr::from_error_kind(
331 input,
332 SeparatedNonEmptyList,
333 )))
334 }
335 // If only the separator had a zero-length match, update the
336 // state. Additionally preserve the terminator error so that
337 // it can be reported as an alternate if there was an item
338 // error.
339 (true, _) => ZeroLengthParseState::Separator { terminator_error },
340
341 // If the separator had a non-zero length match, clear the
342 // state
343 (false, _) => ZeroLengthParseState::None,
344 };
345
346 // Advance the loop state
347 input = tail;
348 }
349 }
350}
351
352#[cfg(test)]
353mod test_separated_terminated {
354 use cool_asserts::assert_matches;
355 use nom::{
356 branch::alt,
357 character::complete::{alpha0, char, digit1, space0},
358 error::ErrorKind,
359 Err, IResult, Parser,
360 };
361
362 use crate::error::{BaseErrorKind, ErrorTree, Expectation};
363 use crate::parser_ext::ParserExt;
364
365 use super::parse_separated_terminated;
366
367 /// Parse a series of numbers, separated by commas, terminated by a period.
368 fn parse_number_list(input: &str) -> IResult<&str, Vec<i64>, ErrorTree<&str>> {
369 parse_separated_terminated(
370 digit1.parse_from_str(),
371 char(',').delimited_by(space0),
372 char('.').preceded_by(space0),
373 Vec::new,
374 |vec, num| express!(vec.push(num)),
375 )
376 .parse(input)
377 }
378
379 #[test]
380 fn basic() {
381 assert_eq!(
382 parse_number_list("1, 2, 3, 4, 5.").unwrap(),
383 ("", vec![1, 2, 3, 4, 5]),
384 )
385 }
386
387 #[test]
388 fn trailing_input() {
389 assert_eq!(
390 parse_number_list("1, 2, 3, 4, 5. 4, 5, 6.").unwrap(),
391 (" 4, 5, 6.", vec![1, 2, 3, 4, 5]),
392 )
393 }
394
395 #[test]
396 fn only_one() {
397 assert_eq!(parse_number_list("10.").unwrap(), ("", vec![10]),)
398 }
399
400 #[test]
401 fn at_least_one() {
402 let err = parse_number_list("abc").unwrap_err();
403
404 assert_matches!(
405 err,
406 Err::Error(ErrorTree::Base {
407 location: "abc",
408 kind: BaseErrorKind::Expected(Expectation::Digit)
409 })
410 );
411 }
412
413 /// Test that a parse failure from both the separator and the terminator
414 /// causes an error including both messages
415 #[test]
416 fn terminator_separator_miss() {
417 let err = parse_number_list("10, 20 30.").unwrap_err();
418
419 let choices = assert_matches!(err, Err::Error(ErrorTree::Alt(choices)) => choices);
420 assert_matches!(
421 choices.as_slice(),
422 [
423 ErrorTree::Base {
424 location: "30.",
425 kind: BaseErrorKind::Expected(Expectation::Char(','))
426 },
427 ErrorTree::Base {
428 location: "30.",
429 kind: BaseErrorKind::Expected(Expectation::Char('.'))
430 },
431 ]
432 );
433 }
434
435 /// Test that a terminator is required, even at EoF
436 #[test]
437 fn required_terminator() {
438 let err = parse_number_list("1, 2, 3").unwrap_err();
439
440 let choices = assert_matches!(err, Err::Error(ErrorTree::Alt(choices)) => choices);
441 assert_matches!(
442 choices.as_slice(),
443 [
444 ErrorTree::Base {
445 location: "",
446 kind: BaseErrorKind::Expected(Expectation::Char(','))
447 },
448 ErrorTree::Base {
449 location: "",
450 kind: BaseErrorKind::Expected(Expectation::Char('.'))
451 },
452 ]
453 );
454 }
455
456 /// Test that a parse failure from the item parser includes only that error
457 /// if the separator isn't zero-length
458 #[test]
459 fn item_error() {
460 let err = parse_number_list("1, 2, abc.").unwrap_err();
461
462 assert_matches!(
463 err,
464 Err::Error(ErrorTree::Base {
465 location: "abc.",
466 kind: BaseErrorKind::Expected(Expectation::Digit),
467 })
468 );
469 }
470
471 /// Parse a series of numbers ending in periods, separated by 0 or more
472 /// whitespace, terminated by a semicolon. Used to test 0-length
473 /// separator behavior.
474 fn parse_number_dot_list(input: &str) -> IResult<&str, Vec<i64>, ErrorTree<&str>> {
475 parse_separated_terminated(
476 digit1.terminated(char('.')).parse_from_str(),
477 space0,
478 char(';'),
479 Vec::new,
480 |vec, num| express!(vec.push(num)),
481 )
482 .parse(input)
483 }
484
485 #[test]
486 fn zero_length_separator() {
487 assert_eq!(
488 parse_number_dot_list("1.2. 3.4. 5.; abc").unwrap(),
489 (" abc", vec![1, 2, 3, 4, 5])
490 );
491 }
492
493 /// Test that, when a separator matches zero length, and then the item
494 /// parser fails, the returned error includes both the item error and the
495 /// terminator error.
496 #[test]
497 fn zero_length_separator_item_term_error() {
498 let err = parse_number_dot_list("1.2.3.abc.;").unwrap_err();
499
500 let choices = assert_matches!(err, Err::Error(ErrorTree::Alt(choices)) => choices);
501 assert_matches!(
502 choices.as_slice(),
503 [
504 ErrorTree::Base {
505 location: "abc.;",
506 kind: BaseErrorKind::Expected(Expectation::Digit)
507 },
508 ErrorTree::Base {
509 location: "abc.;",
510 kind: BaseErrorKind::Expected(Expectation::Char(';'))
511 },
512 ]
513 );
514 }
515
516 /// Parse a series of runs of 1 or more digits or 0 more more letters, separated by
517 /// an optional dash, terminated by a semicolon. Used to test
518 /// infinite loop detection
519 fn parse_letters_numbers(input: &str) -> IResult<&str, Vec<&str>, ErrorTree<&str>> {
520 parse_separated_terminated(
521 alt((digit1, alpha0)),
522 char('-').opt(),
523 char(';'),
524 Vec::new,
525 |vec, num| express!(vec.push(num)),
526 )
527 .parse(input)
528 }
529
530 #[test]
531 fn zero_length_item() {
532 assert_eq!(
533 parse_letters_numbers("----; abc").unwrap(),
534 (" abc", vec!["", "", "", "", ""])
535 )
536 }
537
538 #[test]
539 fn zero_length_separators() {
540 assert_eq!(
541 parse_letters_numbers("abc123abc123; abc").unwrap(),
542 (" abc", vec!["abc", "123", "abc", "123"]),
543 )
544 }
545
546 /// Test that both zero-length separators and items are allowed together,
547 /// as long as the loop makes progress
548 #[test]
549 fn zero_length_mixed() {
550 assert_eq!(
551 parse_letters_numbers("abc--123abc-123-; abc").unwrap(),
552 (" abc", vec!["abc", "", "123", "abc", "123", ""]),
553 )
554 }
555
556 /// Test that if the loop makes no progress, that's an error
557 #[test]
558 fn infinite_loop_aborts() {
559 let err = parse_letters_numbers("abc123-.; abc").unwrap_err();
560
561 assert_matches!(
562 err,
563 Err::Error(ErrorTree::Base {
564 location: ".; abc",
565 kind: BaseErrorKind::Kind(ErrorKind::SeparatedNonEmptyList)
566 })
567 );
568 }
569
570 /// Parse a series of numbers, separated by commas, terminated by optional
571 /// comma and eof. Used to test that the terminator "wins" when it and the
572 /// separator can match the same string.
573 fn parse_comma_separated(input: &str) -> IResult<&str, Vec<i64>, ErrorTree<&str>> {
574 parse_separated_terminated(
575 digit1.parse_from_str(),
576 char(','),
577 char(',').opt().all_consuming(),
578 Vec::new,
579 |vec, num| express!(vec.push(num)),
580 )
581 .parse(input)
582 }
583
584 #[test]
585 fn empty_terminator_wins() {
586 assert_eq!(
587 parse_comma_separated("1,2,3,4").unwrap(),
588 ("", vec![1, 2, 3, 4]),
589 );
590 }
591
592 #[test]
593 fn test_terminator_wins() {
594 assert_eq!(
595 parse_comma_separated("1,2,3,4,").unwrap(),
596 ("", vec![1, 2, 3, 4]),
597 )
598 }
599}