chumsky/lib.rs
1#![cfg_attr(not(any(doc, feature = "std", test)), no_std)]
2#![cfg_attr(docsrs, feature(doc_cfg), deny(rustdoc::all))]
3#![cfg_attr(
4 feature = "nightly",
5 feature(never_type, fn_traits, tuple_trait, unboxed_closures, specialization)
6)]
7#![cfg_attr(feature = "nightly", allow(incomplete_features))]
8#![doc = include_str!("../README.md")]
9#![deny(missing_docs, clippy::undocumented_unsafe_blocks)]
10// A lot of clippy's default lints are silly and annoying
11#![allow(clippy::style, clippy::useless_format, clippy::type_complexity)]
12
13extern crate alloc;
14extern crate core;
15
16macro_rules! go_extra {
17 ( $O :ty ) => {
18 #[inline(always)]
19 fn go_emit(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<Emit, $O> {
20 Parser::<I, $O, E>::go::<Emit>(self, inp)
21 }
22 #[inline(always)]
23 fn go_check(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<Check, $O> {
24 Parser::<I, $O, E>::go::<Check>(self, inp)
25 }
26 };
27}
28
29mod blanket;
30pub mod combinator;
31pub mod container;
32#[cfg(feature = "debug")]
33pub mod debug;
34#[cfg(feature = "either")]
35mod either;
36pub mod error;
37#[cfg(feature = "extension")]
38pub mod extension;
39pub mod extra;
40#[cfg(docsrs)]
41pub mod guide;
42pub mod input;
43pub mod inspector;
44pub mod label;
45#[cfg(feature = "lexical-numbers")]
46pub mod number;
47#[cfg(feature = "pratt")]
48pub mod pratt;
49pub mod primitive;
50mod private;
51pub mod recovery;
52pub mod recursive;
53#[cfg(feature = "regex")]
54pub mod regex;
55pub mod span;
56mod stream;
57pub mod text;
58#[cfg(feature = "bytes")]
59mod tokio;
60pub mod util;
61
62/// Commonly used functions, traits and types.
63///
64/// *Listen, three eyes,” he said, “don’t you try to outweird me, I get stranger things than you free with my breakfast
65/// cereal.”*
66pub mod prelude {
67 #[cfg(feature = "lexical-numbers")]
68 pub use super::number::number;
69 #[cfg(feature = "regex")]
70 pub use super::regex::regex;
71 pub use super::{
72 error::{Cheap, EmptyErr, Error as _, Rich, Simple},
73 extra,
74 input::Input,
75 primitive::{
76 any, any_ref, choice, custom, empty, end, group, just, map_ctx, none_of, one_of, set,
77 todo,
78 },
79 recovery::{nested_delimiters, skip_then_retry_until, skip_until, via_parser},
80 recursive::{recursive, Recursive},
81 span::{SimpleSpan, Span as _, SpanWrap as _, Spanned},
82 text, Boxed, ConfigIterParser, ConfigParser, IterParser, ParseResult, Parser,
83 };
84 pub use crate::{select, select_ref};
85}
86
87use crate::input::InputOwn;
88use alloc::{
89 boxed::Box,
90 rc::{self, Rc},
91 string::String,
92 vec::Vec,
93};
94#[cfg(feature = "nightly")]
95use core::marker::Tuple;
96use core::{
97 borrow::Borrow,
98 cmp::{Eq, Ord, Ordering},
99 fmt,
100 hash::Hash,
101 marker::PhantomData,
102 mem::MaybeUninit,
103 ops::{Deref, DerefMut, Range, RangeFrom},
104 panic::Location,
105 str::FromStr,
106};
107#[cfg(feature = "memoization")]
108use hashbrown::HashMap;
109#[cfg(feature = "serde")]
110use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
111
112use self::{
113 combinator::*,
114 container::*,
115 error::Error,
116 extra::ParserExtra,
117 input::{
118 BorrowInput, Emitter, ExactSizeInput, InputRef, MapExtra, SliceInput, StrInput, ValueInput,
119 },
120 inspector::Inspector,
121 label::{LabelError, Labelled, LabelledWith},
122 prelude::*,
123 primitive::Any,
124 private::{Check, Emit, IPResult, Located, MaybeUninitExt, Mode, PResult, Sealed},
125 recovery::{RecoverWith, Strategy},
126 span::{Span, WrappingSpan},
127 text::*,
128 util::{IntoMaybe, MaybeMut, MaybeRef},
129};
130#[cfg(all(feature = "extension", doc))]
131use self::{extension::v1::*, primitive::custom, stream::Stream};
132
133/// A type that allows mentioning type parameters *without* all of the customary omission of auto traits that comes
134/// with `PhantomData`.
135struct EmptyPhantom<T>(core::marker::PhantomData<T>);
136
137impl<T> EmptyPhantom<T> {
138 const fn new() -> Self {
139 Self(core::marker::PhantomData)
140 }
141}
142
143impl<T> Copy for EmptyPhantom<T> {}
144impl<T> Clone for EmptyPhantom<T> {
145 fn clone(&self) -> Self {
146 *self
147 }
148}
149// SAFETY: This is safe because `EmptyPhantom` doesn't actually contain a `T`.
150unsafe impl<T> Send for EmptyPhantom<T> {}
151// SAFETY: This is safe because `EmptyPhantom` doesn't actually contain a `T`.
152unsafe impl<T> Sync for EmptyPhantom<T> {}
153impl<T> Unpin for EmptyPhantom<T> {}
154impl<T> core::panic::UnwindSafe for EmptyPhantom<T> {}
155impl<T> core::panic::RefUnwindSafe for EmptyPhantom<T> {}
156
157pub(crate) type DynParser<'src, 'b, I, O, E> = dyn Parser<'src, I, O, E> + 'b;
158#[cfg(feature = "pratt")]
159pub(crate) type DynOperator<'src, 'b, I, O, E> = dyn pratt::Operator<'src, I, O, E> + 'b;
160
161/// Labels corresponding to a variety of patterns.
162#[derive(Clone, Debug, PartialEq)]
163#[non_exhaustive]
164pub enum DefaultExpected<'a, T> {
165 /// A specific token was expected.
166 Token(MaybeRef<'a, T>),
167 /// Anything other than the end of input was expected.
168 Any,
169 /// Something other than the provided input was expected.
170 SomethingElse,
171 /// There was no input that could have satisfied the parser.
172 NothingElse,
173 /// The end of input was expected.
174 EndOfInput,
175}
176
177impl<T> DefaultExpected<'_, T> {
178 /// Convert this [`DefaultExpected`] into an owned version of itself, cloning any inner references if required.
179 #[inline]
180 pub fn into_owned<'b>(self) -> DefaultExpected<'b, T>
181 where
182 T: Clone,
183 {
184 match self {
185 Self::Token(tok) => DefaultExpected::Token(tok.into_owned()),
186 Self::Any => DefaultExpected::Any,
187 Self::SomethingElse => DefaultExpected::SomethingElse,
188 Self::NothingElse => DefaultExpected::NothingElse,
189 Self::EndOfInput => DefaultExpected::EndOfInput,
190 }
191 }
192}
193
194/// The result of performing a parse on an input with [`Parser`].
195///
196/// Unlike `Result`, this type is designed to express the fact that generating outputs and errors are not
197/// mutually-exclusive operations: it is possible for a parse to produce non-terminal errors (see
198/// [`Parser::recover_with`] while still producing useful output).
199///
200/// If you don't care for recovered outputs and you with to treat success/failure as a binary, you may use
201/// [`ParseResult::into_result`].
202#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
203pub struct ParseResult<T, E> {
204 output: Option<T>,
205 errs: Vec<E>,
206}
207
208impl<T, E> ParseResult<T, E> {
209 pub(crate) fn new(output: Option<T>, errs: Vec<E>) -> ParseResult<T, E> {
210 ParseResult { output, errs }
211 }
212
213 /// Whether this result contains output
214 pub fn has_output(&self) -> bool {
215 self.output.is_some()
216 }
217
218 /// Whether this result has any errors
219 pub fn has_errors(&self) -> bool {
220 !self.errs.is_empty()
221 }
222
223 /// Get a reference to the output of this result, if it exists
224 pub fn output(&self) -> Option<&T> {
225 self.output.as_ref()
226 }
227
228 /// Get an iterator over the parse errors for this result. The iterator will produce no items if there were no
229 /// errors.
230 pub fn errors(&self) -> impl ExactSizeIterator<Item = &E> + DoubleEndedIterator {
231 self.errs.iter()
232 }
233
234 /// Convert this `ParseResult` into an option containing the output, if any exists
235 pub fn into_output(self) -> Option<T> {
236 self.output
237 }
238
239 /// Convert this `ParseResult` into a vector containing any errors. The vector will be empty if there were no
240 /// errors.
241 pub fn into_errors(self) -> Vec<E> {
242 self.errs
243 }
244
245 /// Convert this `ParseResult` into a tuple containing the output, if any existed, and errors, if any were
246 /// encountered.
247 pub fn into_output_errors(self) -> (Option<T>, Vec<E>) {
248 (self.output, self.errs)
249 }
250
251 /// Convert this `ParseResult` into a standard `Result`. This discards output if parsing generated any errors,
252 /// matching the old behavior of [`Parser::parse`].
253 pub fn into_result(self) -> Result<T, Vec<E>> {
254 if self.errs.is_empty() {
255 self.output.ok_or(self.errs)
256 } else {
257 Err(self.errs)
258 }
259 }
260
261 /// Convert this `ParseResult` into the output. If any errors were generated (including non-fatal errors!), a
262 /// panic will occur instead.
263 ///
264 /// The use of this function is discouraged in user-facing code. However, it may be convenient for use in tests.
265 #[track_caller]
266 pub fn unwrap(self) -> T
267 where
268 E: fmt::Debug,
269 {
270 if self.has_errors() {
271 panic!(
272 "called `ParseResult::unwrap` on a parse result containing errors: {:?}",
273 &self.errs
274 )
275 } else {
276 self.output.expect("parser generated no errors or output")
277 }
278 }
279}
280
281/// A trait implemented by parsers.
282///
283/// Parsers take inputs of type `I`, which will implement [`Input`]. Refer to the documentation on [`Input`] for examples
284/// of common input types. It will then attempt to parse them into a value of type `O`, which may be just about any type.
285/// In doing so, they may encounter errors. These need not be fatal to the parsing process: syntactic errors can be
286/// recovered from and a valid output may still be generated alongside any syntax errors that were encountered along the
287/// way. Usually, this output comes in the form of an
288/// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST).
289///
290/// The final type parameter, `E`, is expected to be one of the type in the [`extra`] module,
291/// implementing [`ParserExtra`]. This trait is used to encapsulate the various types a parser
292/// uses that are not simply its input and output. Refer to the documentation on the [`ParserExtra`] trait
293/// for more detail on the contained types. If not provided, it will default to [`extra::Default`],
294/// which will have the least overhead, but also the least meaningful errors.
295///
296/// The lifetime of the parser is used for zero-copy output - the input is bound by the lifetime,
297/// and returned values or parser state may take advantage of this to borrow tokens or slices of the
298/// input and hold on to them, if the input supports this.
299///
300/// # Stability
301///
302/// This trait is not intended to be implemented by downstream users of `chumsky`. While you can technically implement
303/// it, doing so is considered to be outside the stability guarantees of the crate. Your code may break with a future,
304/// semver-compatible release! Instead of implementing this trait, you should consider other options:
305///
306/// 1) Try using combinators like [`Parser::try_map`] and [`Parser::validate`] to implement custom error generation
307///
308/// 2) Use [`custom`] to implement your own parsing logic inline within an existing parser
309///
310/// 3) Use chumsky's [`extension`] API to write an extension parser that feels like it's native to chumsky
311///
312/// 4) If you believe you've found a common use-case that's missing from chumsky, you could open a pull request to
313/// implement it in chumsky itself rather than implementing `Parser` yourself.
314// #[cfg_attr(
315// feature = "nightly",
316// diagnostic::on_unimplemented(
317// message = "The following is not a parser from `{I}` to `{O}`: `{Self}`",
318// label = "This parser is not compatible because it does not implement `Parser<{I}, {O}, E>`",
319// note = "You should check that the output types of your parsers are consistent with the combinators you're using",
320// )
321// )]
322pub trait Parser<'src, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Default> {
323 /// Generate debugging information for this parser.
324 ///
325 /// This is an unstable feature, and will likely remain so indefinitely. As such, it **does not fall inside the semver
326 /// guarantees** of the broader crate. It is intended to aid the development of parsers and should not be used as part
327 /// of production software.
328 #[cfg(feature = "debug")]
329 fn debug(&self) -> debug::DebugInfo<'_> {
330 debug::DebugInfo {
331 node_info: self.node_info(&mut Default::default()),
332 phantom: PhantomData,
333 }
334 }
335
336 #[doc(hidden)]
337 #[cfg(feature = "debug")]
338 fn node_info(&self, _scope: &mut debug::NodeScope) -> debug::NodeInfo {
339 let ty = core::any::type_name::<Self>();
340 debug::NodeInfo::Unknown(ty.split_once('<').map_or(ty, |(ty, _)| ty).to_string())
341 }
342
343 #[doc(hidden)]
344 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O>
345 where
346 Self: Sized;
347
348 #[doc(hidden)]
349 fn go_emit(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<Emit, O>;
350 #[doc(hidden)]
351 fn go_check(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<Check, O>;
352
353 /// Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way.
354 ///
355 /// If `None` is returned (i.e: parsing failed) then there will *always* be at least one item in the error `Vec`.
356 /// If you want to include non-default state, use [`Parser::parse_with_state`] instead.
357 ///
358 /// Although the signature of this function looks complicated, it's simpler than you think! You can pass a
359 /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
360 fn parse(&self, input: I) -> ParseResult<O, E::Error>
361 where
362 I: Input<'src>,
363 E::State: Default,
364 E::Context: Default,
365 {
366 self.parse_with_state(input, &mut E::State::default())
367 }
368
369 /// Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way.
370 /// The provided state will be passed on to parsers that expect it, such as [`map_with`](Parser::map_with).
371 ///
372 /// If `None` is returned (i.e: parsing failed) then there will *always* be at least one item in the error `Vec`.
373 /// If you want to just use a default state value, use [`Parser::parse`] instead.
374 ///
375 /// Although the signature of this function looks complicated, it's simpler than you think! You can pass a
376 /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
377 fn parse_with_state(&self, input: I, state: &mut E::State) -> ParseResult<O, E::Error>
378 where
379 I: Input<'src>,
380 E::Context: Default,
381 {
382 let mut own = InputOwn::new_state(input, state);
383 let mut inp = own.as_ref_start();
384 let res = self.then_ignore(end()).go::<Emit>(&mut inp);
385 let alt = inp.take_alt().map(|alt| alt.err).unwrap_or_else(|| {
386 let fake_span = inp.span_since(&inp.cursor());
387 // TODO: Why is this needed?
388 E::Error::expected_found([], None, fake_span)
389 });
390 let mut errs = own.into_errs();
391 let out = match res {
392 Ok(out) => Some(out),
393 Err(()) => {
394 errs.push(alt);
395 None
396 }
397 };
398 ParseResult::new(out, errs)
399 }
400
401 /// Parse a stream of tokens, ignoring any output, and returning any errors encountered along the way.
402 ///
403 /// If parsing failed, then there will *always* be at least one item in the returned `Vec`.
404 /// If you want to include non-default state, use [`Parser::check_with_state`] instead.
405 ///
406 /// Although the signature of this function looks complicated, it's simpler than you think! You can pass a
407 /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
408 fn check(&self, input: I) -> ParseResult<(), E::Error>
409 where
410 Self: Sized,
411 I: Input<'src>,
412 E::State: Default,
413 E::Context: Default,
414 {
415 self.check_with_state(input, &mut E::State::default())
416 }
417
418 /// Parse a stream of tokens, ignoring any output, and returning any errors encountered along the way.
419 ///
420 /// If parsing failed, then there will *always* be at least one item in the returned `Vec`.
421 /// If you want to just use a default state value, use [`Parser::check`] instead.
422 ///
423 /// Although the signature of this function looks complicated, it's simpler than you think! You can pass a
424 /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
425 fn check_with_state(&self, input: I, state: &mut E::State) -> ParseResult<(), E::Error>
426 where
427 Self: Sized,
428 I: Input<'src>,
429 E::Context: Default,
430 {
431 let mut own = InputOwn::new_state(input, state);
432 let mut inp = own.as_ref_start();
433 let res = self.then_ignore(end()).go::<Check>(&mut inp);
434 let alt = inp.take_alt().map(|alt| alt.err).unwrap_or_else(|| {
435 let fake_span = inp.span_since(&inp.cursor());
436 // TODO: Why is this needed?
437 E::Error::expected_found([], None, fake_span)
438 });
439 let mut errs = own.into_errs();
440 let out = match res {
441 Ok(()) => Some(()),
442 Err(()) => {
443 errs.push(alt);
444 None
445 }
446 };
447 ParseResult::new(out, errs)
448 }
449
450 /// Convert the output of this parser into a slice of the input, based on the current parser's
451 /// span.
452 ///
453 /// Note: unlike the parser `.repeated().collect()`, this method includes all tokens that are
454 /// "ignored" by the parser, including any padding, separators, and sub-parsers with
455 /// [`Parser::ignored`], [`Parser::ignore_then`], and [`Parser::then_ignore`].
456 ///
457 /// # Examples
458 /// Example with input of type `&str` (token type is `char`).
459 /// ```
460 /// # use chumsky::prelude::*;
461 /// // Matches a number with underscores that is surrounded by apostrophes.
462 /// let quoted_numeric = any::<&str, extra::Err<Simple<char>>>()
463 /// .filter(|c: &char| c.is_digit(10))
464 /// .separated_by(just("_").repeated().at_most(1))
465 /// .to_slice()
466 /// .padded_by(just("'"));
467 /// assert_eq!(quoted_numeric.parse("'1_23'").into_result(), Ok("1_23"));
468 /// ```
469 /// Example with input of type `&[u32]` (token type is `u32`).
470 /// ```
471 /// # use chumsky::prelude::*;
472 /// // Matches even numbers, then ignoring the rest of the input when an odd number is reached.
473 /// let even_matcher = any::<&[u32], extra::Err<Simple<u32>>>()
474 /// .filter(|c: &u32| c % 2 == 0)
475 /// .repeated()
476 /// .to_slice()
477 /// .lazy();
478 /// assert_eq!(even_matcher.parse(&[2, 4, 8, 5, 6]).unwrap(), &[2, 4, 8]);
479 /// ```
480 fn to_slice(self) -> ToSlice<Self, O>
481 where
482 Self: Sized,
483 {
484 ToSlice {
485 parser: self,
486 phantom: EmptyPhantom::new(),
487 }
488 }
489
490 /// Filter the output of this parser, accepting only inputs that match the given predicate.
491 ///
492 /// The output type of this parser is `I`, the input that was found.
493 ///
494 /// # Examples
495 ///
496 /// ```
497 /// # use chumsky::{prelude::*, error::Simple};
498 /// let lowercase = any::<_, extra::Err<Simple<char>>>()
499 /// .filter(char::is_ascii_lowercase)
500 /// .repeated()
501 /// .at_least(1)
502 /// .collect::<String>();
503 ///
504 /// assert_eq!(lowercase.parse("hello").into_result(), Ok("hello".to_string()));
505 /// assert!(lowercase.parse("Hello").has_errors());
506 /// ```
507 fn filter<F: Fn(&O) -> bool>(self, f: F) -> Filter<Self, F>
508 where
509 Self: Sized,
510 {
511 Filter {
512 parser: self,
513 filter: f,
514 }
515 }
516
517 /// Filter and map the output of this parser, accepting only inputs that get mapped to `Some`.
518 ///
519 /// The output type of this parser is `U`.
520 ///
521 /// # Examples
522 ///
523 /// ```
524 /// # use chumsky::{prelude::*, error::Simple};
525 /// #[derive(Debug, PartialEq)]
526 /// enum Token {
527 /// Digit(char), // invariant: .is_ascii_digit()
528 /// Alpha(char), // invariant: .is_alphabetic()
529 /// }
530 ///
531 /// let token = any::<_, extra::Err<Simple<char>>>()
532 /// .filter_map(|c: char| if c.is_ascii_digit() {
533 /// Some(Token::Digit(c))
534 /// } else if c.is_alphabetic() {
535 /// Some(Token::Alpha(c))
536 /// } else {
537 /// None
538 /// });
539 ///
540 /// assert_eq!(token.parse("x").into_result(), Ok(Token::Alpha('x')));
541 /// assert_eq!(token.parse("5").into_result(), Ok(Token::Digit('5')));
542 /// assert!(token.parse("!").has_errors());
543 /// ```
544 fn filter_map<U, F: Fn(O) -> Option<U>>(self, f: F) -> FilterMap<Self, O, F>
545 where
546 Self: Sized,
547 {
548 FilterMap {
549 parser: self,
550 filter_mapper: f,
551 phantom: EmptyPhantom::new(),
552 }
553 }
554
555 /// Map the output of this parser to another value.
556 ///
557 /// The output type of this parser is `U`, the same as the function's output.
558 ///
559 /// # Examples
560 ///
561 /// ```
562 /// # use chumsky::{prelude::*, error::Simple};
563 /// #[derive(Debug, PartialEq)]
564 /// enum Token { Word(String), Num(u64) }
565 ///
566 /// let word = any::<_, extra::Err<Simple<char>>>()
567 /// .filter(|c: &char| c.is_alphabetic())
568 /// .repeated().at_least(1)
569 /// .collect::<String>()
570 /// .map(Token::Word);
571 ///
572 /// let num = any::<_, extra::Err<Simple<char>>>()
573 /// .filter(|c: &char| c.is_ascii_digit())
574 /// .repeated().at_least(1)
575 /// .collect::<String>()
576 /// .map(|s| Token::Num(s.parse().unwrap()));
577 ///
578 /// let token = word.or(num);
579 ///
580 /// assert_eq!(token.parse("test").into_result(), Ok(Token::Word("test".to_string())));
581 /// assert_eq!(token.parse("42").into_result(), Ok(Token::Num(42)));
582 /// ```
583 fn map<U, F: Fn(O) -> U>(self, f: F) -> Map<Self, O, F>
584 where
585 Self: Sized,
586 {
587 Map {
588 parser: self,
589 mapper: f,
590 phantom: EmptyPhantom::new(),
591 }
592 }
593
594 /// Map the output of this parser to another value, with the opportunity to get extra metadata from the parse like the span or parser state.
595 ///
596 /// See the docs for [`MapExtra`] for examples of metadata that can be fetched.
597 ///
598 /// The output type of this parser is `U`, the same as the function's output.
599 ///
600 /// # Examples
601 ///
602 /// Using the span of the output in the mapping function:
603 ///
604 /// ```
605 /// # use chumsky::prelude::*;
606 ///
607 /// // It's common for AST nodes to use a wrapper type that allows attaching span information to them
608 /// #[derive(Debug, PartialEq)]
609 /// pub struct Spanned<T>(T, SimpleSpan<usize>);
610 ///
611 /// let ident = text::ascii::ident::<_, extra::Err<Simple<char>>>()
612 /// .map_with(|ident, e| Spanned(ident, e.span())) // Equivalent to `.map_with_span(|ident, span| Spanned(ident, span))`
613 /// .padded();
614 ///
615 /// assert_eq!(ident.parse("hello").into_result(), Ok(Spanned("hello", (0..5).into())));
616 /// assert_eq!(ident.parse(" hello ").into_result(), Ok(Spanned("hello", (7..12).into())));
617 /// ```
618 ///
619 /// Using the parser state in the mapping function to intern strings:
620 ///
621 /// ```
622 /// # use chumsky::prelude::*;
623 /// use std::ops::Range;
624 /// use lasso::{Rodeo, Spur};
625 ///
626 /// // It's common for AST nodes to use interned versions of identifiers
627 /// // Keys are generally smaller, faster to compare, and can be `Copy`
628 /// #[derive(Copy, Clone)]
629 /// pub struct Ident(Spur);
630 ///
631 /// let ident = text::ascii::ident::<_, extra::Full<Simple<char>, extra::SimpleState<Rodeo>, ()>>()
632 /// .map_with(|ident, e| Ident(e.state().get_or_intern(ident)))
633 /// .padded()
634 /// .repeated()
635 /// .at_least(1)
636 /// .collect::<Vec<_>>();
637 ///
638 /// // Test out parser
639 ///
640 /// let mut interner = extra::SimpleState(Rodeo::new());
641 ///
642 /// match ident.parse_with_state("hello", &mut interner).into_result() {
643 /// Ok(idents) => {
644 /// assert_eq!(interner.resolve(&idents[0].0), "hello");
645 /// }
646 /// Err(e) => panic!("Parsing Failed: {:?}", e),
647 /// }
648 ///
649 /// match ident.parse_with_state("hello hello", &mut interner).into_result() {
650 /// Ok(idents) => {
651 /// assert_eq!(idents[0].0, idents[1].0);
652 /// }
653 /// Err(e) => panic!("Parsing Failed: {:?}", e),
654 /// }
655 /// ```
656 ///
657 /// Using the parse context in the mapping function:
658 ///
659 /// ```
660 /// # use chumsky::{prelude::*, error::Simple};
661 ///
662 /// fn palindrome_parser<'src>() -> impl Parser<'src, &'src str, String> {
663 /// recursive(|chain| {
664 /// choice((
665 /// just(String::new())
666 /// .configure(|cfg, ctx: &String| cfg.seq(ctx.clone()))
667 /// .then_ignore(end()),
668 /// any()
669 /// .map_with(|x, e| format!("{x}{}", e.ctx()))
670 /// .ignore_with_ctx(chain),
671 /// ))
672 /// })
673 /// .with_ctx(String::new())
674 /// }
675 ///
676 /// assert_eq!(palindrome_parser().parse("abccba").into_result().as_deref(), Ok("cba"));
677 /// assert_eq!(palindrome_parser().parse("hello olleh").into_result().as_deref(), Ok(" olleh"));
678 /// assert!(palindrome_parser().parse("abccb").into_result().is_err());
679 /// ```
680 fn map_with<U, F: Fn(O, &mut MapExtra<'src, '_, I, E>) -> U>(self, f: F) -> MapWith<Self, O, F>
681 where
682 Self: Sized,
683 {
684 MapWith {
685 parser: self,
686 mapper: f,
687 phantom: EmptyPhantom::new(),
688 }
689 }
690
691 /// Map the output of this parser to another value.
692 /// If the output of this parser isn't a tuple, use [`Parser::map`].
693 ///
694 /// The output type of this parser is `U`, the same as the function's output.
695 ///
696 /// # Examples
697 ///
698 /// ```
699 /// # use chumsky::prelude::*;
700 /// #[derive(Clone, Copy, Debug, PartialEq, Eq)]
701 /// pub enum Value {
702 /// One(u8),
703 /// Two(u8, u8),
704 /// Three(u8, u8, u8),
705 /// }
706 ///
707 /// fn parser<'src>() -> impl Parser<'src, &'src [u8], Vec<Value>> {
708 /// choice((
709 /// just(1).ignore_then(any()).map(Value::One),
710 /// just(2)
711 /// .ignore_then(group((any(), any())))
712 /// .map_group(Value::Two),
713 /// just(3)
714 /// .ignore_then(group((any(), any(), any())))
715 /// .map_group(Value::Three),
716 /// ))
717 /// .repeated()
718 /// .collect()
719 /// }
720 ///
721 /// let bytes = &[3, 1, 2, 3, 1, 127, 2, 21, 69];
722 /// assert_eq!(
723 /// parser().parse(bytes).into_result(),
724 /// Ok(vec![
725 /// Value::Three(1, 2, 3),
726 /// Value::One(127),
727 /// Value::Two(21, 69)
728 /// ])
729 /// );
730 /// ```
731 #[cfg(feature = "nightly")]
732 fn map_group<F: Fn<O>>(self, f: F) -> MapGroup<Self, O, F>
733 where
734 Self: Sized,
735 O: Tuple,
736 {
737 MapGroup {
738 parser: self,
739 mapper: f,
740 phantom: EmptyPhantom::new(),
741 }
742 }
743
744 /// Transform the output of this parser to the pattern's span.
745 ///
746 /// This is commonly used when you know what pattern you've parsed and are only interested in the span of the
747 /// pattern.
748 ///
749 /// The output type of this parser is `I::Span`.
750 ///
751 /// # Examples
752 ///
753 /// ```
754 /// # use chumsky::prelude::*;
755 ///
756 /// // It's common for AST nodes to use a wrapper type that allows attaching span information to them
757 /// #[derive(Debug, PartialEq)]
758 /// pub enum Expr<'src> {
759 /// Int(&'src str, SimpleSpan),
760 /// // The span is that of the operator, '+'
761 /// Add(Box<Expr<'src>>, SimpleSpan, Box<Expr<'src>>),
762 /// }
763 ///
764 /// let int = text::int::<_, extra::Err<Simple<char>>>(10)
765 /// .to_slice()
766 /// .map_with(|int, e| Expr::Int(int, e.span()))
767 /// .padded();
768 ///
769 /// let add_op = just('+').to_span().padded();
770 /// let sum = int.foldl(
771 /// add_op.then(int).repeated(),
772 /// |a, (op_span, b)| Expr::Add(Box::new(a), op_span, Box::new(b)),
773 /// );
774 ///
775 /// assert_eq!(sum.parse("42 + 7 + 13").into_result(), Ok(Expr::Add(
776 /// Box::new(Expr::Add(
777 /// Box::new(Expr::Int("42", (0..2).into())),
778 /// (3..4).into(),
779 /// Box::new(Expr::Int("7", (5..6).into())),
780 /// )),
781 /// (7..8).into(),
782 /// Box::new(Expr::Int("13", (9..11).into())),
783 /// )));
784 /// ```
785 fn to_span(self) -> ToSpan<Self, O>
786 where
787 Self: Sized,
788 {
789 ToSpan {
790 parser: self,
791 phantom: EmptyPhantom::new(),
792 }
793 }
794 /// Left-fold the output of the parser into a single value, possibly failing during the reduction.
795 /// The parser only consumes input from the inner parser until it either completes or the reduction
796 /// step fails ("short circuting").
797 ///
798 /// The output type of this parser is `A`, the left-hand component of the original parser's output.
799 ///
800 /// # Examples
801 ///
802 /// ```
803 /// # use chumsky::{prelude::*, error::Simple};
804 /// let int = text::int::<_, extra::Err<Simple<char>>>(10)
805 /// .from_str::<u8>()
806 /// .unwrapped();
807 ///
808 /// let sum = int
809 /// .clone()
810 /// .try_foldl(just('+').ignore_then(int).repeated(), |a, b, e| a.checked_add(b).ok_or(Simple::new(None, e.span())));
811 ///
812 /// assert_eq!(sum.parse("1+12+3+9").into_result(), Ok(25));
813 /// assert_eq!(sum.parse("6").into_result(), Ok(6));
814 /// assert!(sum.parse("255+1").has_errors()); // due to u8 overflow
815 /// ```
816 #[cfg_attr(debug_assertions, track_caller)]
817 fn try_foldl<B, F, OB>(self, other: B, f: F) -> TryFoldl<F, Self, B, OB, E>
818 where
819 F: Fn(O, OB, &mut MapExtra<'src, '_, I, E>) -> Result<O, E::Error>,
820 B: IterParser<'src, I, OB, E>,
821 Self: Sized,
822 {
823 TryFoldl {
824 parser_a: self,
825 parser_b: other,
826 folder: f,
827 phantom: EmptyPhantom::new(),
828 }
829 }
830
831 /// Wrap the output of this parser in the pattern's span.
832 ///
833 /// This is often used to preserve the span of AST nodes for error generation by future passes.
834 ///
835 /// The output type of this parser is `<I::Span as WrappingSpan>::Spanned<O>`. For parsers using [`SimpleSpan`],
836 /// that means the output type is [`Spanned<O, SimpleSpan>`].
837 fn spanned(self) -> combinator::Spanned<Self, O>
838 where
839 Self: Sized,
840 {
841 combinator::Spanned {
842 parser: self,
843 phantom: EmptyPhantom::new(),
844 }
845 }
846
847 /// After a successful parse, apply a fallible function to the output. If the function produces an error, treat it
848 /// as a parsing error.
849 ///
850 /// If you wish parsing of this pattern to continue when an error is generated instead of halting, consider using
851 /// [`Parser::validate`] instead.
852 ///
853 /// The output type of this parser is `U`, the [`Ok`] return value of the function.
854 ///
855 /// # Examples
856 ///
857 /// ```
858 /// # use chumsky::prelude::*;
859 /// let byte = text::int::<_, extra::Err<Rich<char>>>(10)
860 /// .try_map(|s: &str, span| s
861 /// .parse::<u8>()
862 /// .map_err(|e| Rich::custom(span, e.to_string())));
863 ///
864 /// assert!(byte.parse("255").has_output());
865 /// assert!(byte.parse("256").has_errors()); // Out of range
866 /// ```
867 #[doc(alias = "filter_map")]
868 fn try_map<U, F: Fn(O, I::Span) -> Result<U, E::Error>>(self, f: F) -> TryMap<Self, O, F>
869 where
870 Self: Sized,
871 {
872 TryMap {
873 parser: self,
874 mapper: f,
875 phantom: EmptyPhantom::new(),
876 }
877 }
878
879 /// After a successful parse, apply a fallible function to the output, with the opportunity to get extra metadata.
880 /// If the function produces an error, treat it as a parsing error.
881 ///
882 /// If you wish parsing of this pattern to continue when an error is generated instead of halting, consider using
883 /// [`Parser::validate`] instead.
884 ///
885 /// The output type of this parser is `U`, the [`Ok`] return value of the function.
886 fn try_map_with<U, F: Fn(O, &mut MapExtra<'src, '_, I, E>) -> Result<U, E::Error>>(
887 self,
888 f: F,
889 ) -> TryMapWith<Self, O, F>
890 where
891 Self: Sized,
892 {
893 TryMapWith {
894 parser: self,
895 mapper: f,
896 phantom: EmptyPhantom::new(),
897 }
898 }
899
900 /// Ignore the output of this parser, yielding `()` as an output instead.
901 ///
902 /// This can be used to reduce the cost of parsing by avoiding unnecessary allocations (most collections containing
903 /// [ZSTs](https://doc.rust-lang.org/nomicon/exotic-sizes.html#zero-sized-types-zsts)
904 /// [do not allocate](https://doc.rust-lang.org/std/vec/struct.Vec.html#guarantees)). For example, it's common to
905 /// want to ignore whitespace in many grammars (see [`text::whitespace`]).
906 ///
907 /// The output type of this parser is `()`.
908 ///
909 /// # Examples
910 ///
911 /// ```
912 /// # use chumsky::{prelude::*, error::Simple};
913 /// // A parser that parses any number of whitespace characters without allocating
914 /// let whitespace = any::<_, extra::Err<Simple<char>>>()
915 /// .filter(|c: &char| c.is_whitespace())
916 /// .ignored()
917 /// .repeated()
918 /// .collect::<Vec<_>>();
919 ///
920 /// assert_eq!(whitespace.parse(" ").into_result(), Ok(vec![(); 4]));
921 /// assert!(whitespace.parse(" hello").has_errors());
922 /// ```
923 fn ignored(self) -> Ignored<Self, O>
924 where
925 Self: Sized,
926 {
927 Ignored {
928 parser: self,
929 phantom: EmptyPhantom::new(),
930 }
931 }
932
933 /// Memoize the parser such that later attempts to parse the same input 'remember' the attempt and exit early.
934 ///
935 /// If you're finding that certain inputs produce exponential behavior in your parser, strategically applying
936 /// memoization to a ['garden path'](https://en.wikipedia.org/wiki/Garden-path_sentence) rule is often an effective
937 /// way to solve the problem. At the limit, applying memoization to all combinators will turn any parser into one
938 /// with `O(n)`, albeit with very significant per-element overhead and high memory usage.
939 ///
940 /// Memoization also works with recursion, so this can be used to write parsers using
941 /// [left recursion](https://en.wikipedia.org/wiki/Left_recursion).
942 // TODO: Example
943 #[cfg(feature = "memoization")]
944 fn memoized(self) -> Memoized<Self>
945 where
946 Self: Sized,
947 {
948 Memoized { parser: self }
949 }
950
951 /// Transform all outputs of this parser to a predetermined value.
952 ///
953 /// The output type of this parser is `U`, the type of the predetermined value.
954 ///
955 /// # Examples
956 ///
957 /// ```
958 /// # use chumsky::{prelude::*, error::Simple};
959 /// #[derive(Clone, Debug, PartialEq)]
960 /// enum Op { Add, Sub, Mul, Div }
961 ///
962 /// let op = just::<_, _, extra::Err<Simple<char>>>('+').to(Op::Add)
963 /// .or(just('-').to(Op::Sub))
964 /// .or(just('*').to(Op::Mul))
965 /// .or(just('/').to(Op::Div));
966 ///
967 /// assert_eq!(op.parse("+").into_result(), Ok(Op::Add));
968 /// assert_eq!(op.parse("/").into_result(), Ok(Op::Div));
969 /// ```
970 fn to<U: Clone>(self, to: U) -> To<Self, O, U>
971 where
972 Self: Sized,
973 {
974 To {
975 parser: self,
976 to,
977 phantom: EmptyPhantom::new(),
978 }
979 }
980
981 /// Label this parser with the given label.
982 ///
983 /// Labelling a parser makes all errors generated by the parser refer to the label rather than any sub-elements
984 /// within the parser. For example, labelling a parser for an expression would yield "expected expression" errors
985 /// rather than "expected integer, string, binary op, etc." errors.
986 ///
987 /// Note that this overrides any error messages manually specified through combinators like [`Self::try_map_with`]
988 // TODO: Example
989 fn labelled<L>(self, label: L) -> Labelled<Self, L>
990 where
991 Self: Sized,
992 E::Error: LabelError<'src, I, L>,
993 {
994 Labelled {
995 parser: self,
996 label,
997 is_context: false,
998 debug_info_override: Default::default(),
999 }
1000 }
1001
1002 /// Label this parser with a generated label, with the opportunity to get extra metadata from the parse like the
1003 /// span or parser state.
1004 ///
1005 /// Labelling a parser makes all errors generated by the parser refer to the label rather than any sub-elements
1006 /// within the parser. For example, labelling a parser for an expression would yield "expected expression" errors
1007 /// rather than "expected integer, string, binary op, etc." errors.
1008 fn labelled_with<L, F>(self, label: F) -> LabelledWith<Self, L, F>
1009 where
1010 Self: Sized,
1011 E::Error: LabelError<'src, I, L>,
1012 F: Fn() -> L,
1013 {
1014 LabelledWith {
1015 parser: self,
1016 label,
1017 is_context: false,
1018 debug_info_override: Default::default(),
1019 phantom: PhantomData,
1020 }
1021 }
1022
1023 /// Parse one thing and then another thing, yielding a tuple of the two outputs.
1024 ///
1025 /// The output type of this parser is `(O, U)`, a combination of the outputs of both parsers.
1026 ///
1027 /// If you instead only need the output of __one__ of the parsers, use [`ignore_then`](Self::ignore_then)
1028 /// or [`then_ignore`](Self::then_ignore).
1029 ///
1030 /// # Examples
1031 ///
1032 /// ```
1033 /// # use chumsky::{prelude::*, error::Simple};
1034 /// let word = any::<_, extra::Err<Simple<char>>>()
1035 /// .filter(|c: &char| c.is_alphabetic())
1036 /// .repeated()
1037 /// .at_least(1)
1038 /// .collect::<String>();
1039 /// let two_words = word.then_ignore(just(' ')).then(word);
1040 ///
1041 /// assert_eq!(two_words.parse("dog cat").into_result(), Ok(("dog".to_string(), "cat".to_string())));
1042 /// assert!(two_words.parse("hedgehog").has_errors());
1043 /// ```
1044 fn then<U, B: Parser<'src, I, U, E>>(self, other: B) -> Then<Self, B, O, U, E>
1045 where
1046 Self: Sized,
1047 {
1048 Then {
1049 parser_a: self,
1050 parser_b: other,
1051 phantom: EmptyPhantom::new(),
1052 }
1053 }
1054
1055 /// Parse one thing and then another thing, yielding only the output of the latter.
1056 ///
1057 /// The output type of this parser is `U`, the same as the second parser.
1058 ///
1059 /// If you instead only need the output of the first parser, use [`then_ignore`](Self::then_ignore).
1060 /// If you need the output of __both__ parsers, use [`then`](Self::then).
1061 ///
1062 /// # Examples
1063 ///
1064 /// ```
1065 /// # use chumsky::{prelude::*, error::Simple};
1066 /// let zeroes = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| *c == '0').ignored().repeated().collect::<Vec<_>>();
1067 /// let digits = any().filter(|c: &char| c.is_ascii_digit())
1068 /// .repeated()
1069 /// .collect::<String>();
1070 /// let integer = zeroes
1071 /// .ignore_then(digits)
1072 /// .from_str()
1073 /// .unwrapped();
1074 ///
1075 /// assert_eq!(integer.parse("00064").into_result(), Ok(64));
1076 /// assert_eq!(integer.parse("32").into_result(), Ok(32));
1077 /// ```
1078 fn ignore_then<U, B: Parser<'src, I, U, E>>(self, other: B) -> IgnoreThen<Self, B, O, E>
1079 where
1080 Self: Sized,
1081 {
1082 IgnoreThen {
1083 parser_a: self,
1084 parser_b: other,
1085 phantom: EmptyPhantom::new(),
1086 }
1087 }
1088
1089 /// Parse one thing and then another thing, yielding only the output of the former.
1090 ///
1091 /// The output type of this parser is `O`, the same as the original parser.
1092 ///
1093 /// If you instead only need the output of the second parser, use [`ignore_then`](Self::ignore_then).
1094 /// If you need the output of __both__ parsers, use [`then`](Self::then).
1095 ///
1096 /// # Examples
1097 ///
1098 /// ```
1099 /// # use chumsky::{prelude::*, error::Simple};
1100 /// let word = any::<_, extra::Err<Simple<char>>>()
1101 /// .filter(|c: &char| c.is_alphabetic())
1102 /// .repeated()
1103 /// .at_least(1)
1104 /// .collect::<String>();
1105 ///
1106 /// let punctuated = word
1107 /// .then_ignore(just('!').or(just('?')).or_not());
1108 ///
1109 /// let sentence = punctuated
1110 /// .padded() // Allow for whitespace gaps
1111 /// .repeated()
1112 /// .collect::<Vec<_>>();
1113 ///
1114 /// assert_eq!(
1115 /// sentence.parse("hello! how are you?").into_result(),
1116 /// Ok(vec![
1117 /// "hello".to_string(),
1118 /// "how".to_string(),
1119 /// "are".to_string(),
1120 /// "you".to_string(),
1121 /// ]),
1122 /// );
1123 /// ```
1124 fn then_ignore<U, B: Parser<'src, I, U, E>>(self, other: B) -> ThenIgnore<Self, B, U, E>
1125 where
1126 Self: Sized,
1127 {
1128 ThenIgnore {
1129 parser_a: self,
1130 parser_b: other,
1131 phantom: EmptyPhantom::new(),
1132 }
1133 }
1134
1135 /// Parse input as part of a token-tree - using an input generated from within the current
1136 /// input. In other words, this parser will attempt to create a *new* input stream from within
1137 /// the one it is being run on, and the parser it was called on will be provided this *new* input.
1138 /// By default, the original parser is expected to consume up to the end of the new stream. To
1139 /// allow only consuming part of the stream, use [`Parser::lazy`] to ignore trailing tokens.
1140 ///
1141 /// The provided parser `P` is expected to have both an input and output type which match the input
1142 /// type of the parser it is called on. As an example, if the original parser takes an input of
1143 /// `Stream<Iterator<Item = T>>`, `P` will be run first against that input, and is expected to
1144 /// output a new `Stream<Iterator<Item = T>>` which the original parser will be run against.
1145 ///
1146 /// The output of this parser is `O`, the output of the parser it is called on.
1147 ///
1148 /// # Examples
1149 ///
1150 /// ```
1151 /// # use chumsky::{prelude::*, util::MaybeRef, error::Simple};
1152 /// #[derive(Debug, Clone, PartialEq)]
1153 /// enum Token<'src> {
1154 /// Struct,
1155 /// Ident(&'src str),
1156 /// Item(&'src str),
1157 /// Group(Vec<Token<'src>>),
1158 /// }
1159 ///
1160 /// let group = select_ref! { Token::Group(g) => g.as_slice() };
1161 ///
1162 /// let ident = select_ref! { Token::Ident(i) => *i };
1163 ///
1164 /// let items = select_ref! { Token::Item(i) => *i }
1165 /// .repeated()
1166 /// .collect::<Vec<_>>()
1167 /// .nested_in(group);
1168 ///
1169 /// let struc = just::<_, _, extra::Err<Simple<_>>>(&Token::Struct)
1170 /// .ignore_then(ident)
1171 /// .then(items);
1172 ///
1173 /// let tl = struc
1174 /// .repeated()
1175 /// .collect::<Vec<_>>();
1176 ///
1177 /// let tokens = [
1178 /// Token::Struct,
1179 /// Token::Ident("foo"),
1180 /// Token::Group(vec![
1181 /// Token::Item("a"),
1182 /// Token::Item("b"),
1183 /// ]),
1184 /// ];
1185 ///
1186 /// assert_eq!(tl.parse(&tokens).into_result(), Ok(vec![("foo", vec!["a", "b"])]));
1187 /// ```
1188 fn nested_in<B: Parser<'src, J, I, F>, J, F>(self, other: B) -> NestedIn<Self, B, I, O, F>
1189 where
1190 Self: Sized,
1191 I: 'src,
1192 J: Input<'src>,
1193 F: ParserExtra<'src, J>,
1194 {
1195 NestedIn {
1196 parser_a: self,
1197 parser_b: other,
1198 phantom: EmptyPhantom::new(),
1199 }
1200 }
1201
1202 /// Parse one thing and then another thing, creating the second parser from the result of
1203 /// the first. If you do need the context in the output, use [`Parser::then_with_ctx`].
1204 ///
1205 /// The output of this parser is `U`, the result of the second parser
1206 ///
1207 /// Error recovery for this parser may be sub-optimal, as if the first parser succeeds on
1208 /// recovery then the second produces an error, the primary error will point to the location in
1209 /// the second parser which failed, ignoring that the first parser may be the root cause. There
1210 /// may be other pathological errors cases as well.
1211 ///
1212 /// # Examples
1213 ///
1214 /// ```
1215 /// # use chumsky::{prelude::*, error::Simple};
1216 /// let successor = just(b'\0').configure(|cfg, ctx: &u8| cfg.seq(*ctx + 1));
1217 ///
1218 /// // A parser that parses a single letter and then its successor
1219 /// let successive_letters = one_of::<_, _, extra::Err<Simple<u8>>>(b'a'..=b'z')
1220 /// .ignore_with_ctx(successor);
1221 ///
1222 /// assert_eq!(successive_letters.parse(b"ab").into_result(), Ok(b'b')); // 'b' follows 'a'
1223 /// assert!(successive_letters.parse(b"ac").has_errors()); // 'c' does not follow 'a'
1224 /// ```
1225 fn ignore_with_ctx<U, P>(
1226 self,
1227 then: P,
1228 ) -> IgnoreWithCtx<Self, P, O, I, extra::Full<E::Error, E::State, O>>
1229 where
1230 Self: Sized,
1231 O: 'src,
1232 P: Parser<'src, I, U, extra::Full<E::Error, E::State, O>>,
1233 {
1234 IgnoreWithCtx {
1235 parser: self,
1236 then,
1237 phantom: EmptyPhantom::new(),
1238 }
1239 }
1240
1241 /// Parse one thing and then another thing, creating the second parser from the result of
1242 /// the first. If you don't need the context in the output, prefer [`Parser::ignore_with_ctx`].
1243 ///
1244 /// The output of this parser is `(E::Context, O)`,
1245 /// a combination of the context and the output of the parser.
1246 ///
1247 /// Error recovery for this parser may be sub-optimal, as if the first parser succeeds on
1248 /// recovery then the second produces an error, the primary error will point to the location in
1249 /// the second parser which failed, ignoring that the first parser may be the root cause. There
1250 /// may be other pathological errors cases as well.
1251 fn then_with_ctx<U, P>(
1252 self,
1253 then: P,
1254 ) -> ThenWithCtx<Self, P, O, I, extra::Full<E::Error, E::State, O>>
1255 where
1256 Self: Sized,
1257 O: 'src,
1258 P: Parser<'src, I, U, extra::Full<E::Error, E::State, O>>,
1259 {
1260 ThenWithCtx {
1261 parser: self,
1262 then,
1263 phantom: EmptyPhantom::new(),
1264 }
1265 }
1266
1267 /// Run the previous contextual parser with the provided context.
1268 ///
1269 /// ```
1270 /// # use chumsky::prelude::*;
1271 /// # use chumsky::primitive::JustCfg;
1272 ///
1273 /// let generic = just(b'0').configure(|cfg, ctx: &u8| cfg.seq(*ctx));
1274 ///
1275 /// let parse_a = just::<_, _, extra::Default>(b'b').ignore_then(generic.with_ctx(b'a'));
1276 /// let parse_b = just::<_, _, extra::Default>(b'a').ignore_then(generic.with_ctx(b'b'));
1277 ///
1278 /// assert_eq!(parse_a.parse(b"ba" as &[_]).into_result(), Ok::<_, Vec<EmptyErr>>(b'a'));
1279 /// assert!(parse_a.parse(b"bb").has_errors());
1280 /// assert_eq!(parse_b.parse(b"ab" as &[_]).into_result(), Ok(b'b'));
1281 /// assert!(parse_b.parse(b"aa").has_errors());
1282 /// ```
1283 fn with_ctx(self, ctx: E::Context) -> WithCtx<Self, E::Context>
1284 where
1285 Self: Sized,
1286 E::Context: Clone,
1287 {
1288 WithCtx { parser: self, ctx }
1289 }
1290
1291 /// Runs the previous parser with the provided state.
1292 ///
1293 /// This is very uncommonly used and exists mostly for completeness.
1294 ///
1295 /// One possible use-case is 'glueing' together parsers declared in different places with incompatible state types.
1296 ///
1297 /// Note that the state value will be cloned and dropping *during* parsing, so it is recommended to ensure that
1298 /// this is a relatively performant operation.
1299 fn with_state<State>(self, state: State) -> WithState<Self, State>
1300 where
1301 Self: Sized,
1302 State: 'src + Clone,
1303 {
1304 WithState {
1305 parser: self,
1306 state,
1307 }
1308 }
1309
1310 /// Applies both parsers to the same position in the input, succeeding
1311 /// only if both succeed. The returned value will be that of the first parser,
1312 /// and the input will be at the end of the first parser if `and_is` succeeds.
1313 ///
1314 /// The second parser is allowed to consume more or less input than the first parser,
1315 /// but like its output, how much it consumes won't affect the final result.
1316 ///
1317 /// The motivating use-case is in combination with [`Parser::not`], allowing a parser
1318 /// to consume something only if it isn't also something like an escape sequence or a nested block.
1319 ///
1320 /// # Examples
1321 ///
1322 /// ```
1323 /// # use chumsky::{prelude::*, error::Simple};
1324 ///
1325 /// let escape = just("\\n").to('\n');
1326 ///
1327 /// // C-style string literal
1328 /// let string = none_of::<_, _, extra::Err<Simple<char>>>('"')
1329 /// .and_is(escape.not())
1330 /// .or(escape)
1331 /// .repeated()
1332 /// .collect::<String>()
1333 /// .padded_by(just('"'));
1334 ///
1335 /// assert_eq!(
1336 /// string.parse("\"wxyz\"").into_result().as_deref(),
1337 /// Ok("wxyz"),
1338 /// );
1339 /// assert_eq!(
1340 /// string.parse("\"a\nb\"").into_result().as_deref(),
1341 /// Ok("a\nb"),
1342 /// );
1343 /// ```
1344 fn and_is<U, B>(self, other: B) -> AndIs<Self, B, U>
1345 where
1346 Self: Sized,
1347 B: Parser<'src, I, U, E>,
1348 {
1349 AndIs {
1350 parser_a: self,
1351 parser_b: other,
1352 phantom: EmptyPhantom::new(),
1353 }
1354 }
1355
1356 /// Parse the pattern surrounded by the given delimiters.
1357 ///
1358 /// The output type of this parser is `O`, the same as the original parser.
1359 ///
1360 /// # Examples
1361 ///
1362 /// ```
1363 /// # use chumsky::{prelude::*, error::Simple};
1364 /// // A LISP-style S-expression
1365 /// #[derive(Debug, PartialEq)]
1366 /// enum SExpr {
1367 /// Ident(String),
1368 /// Num(u64),
1369 /// List(Vec<SExpr>),
1370 /// }
1371 ///
1372 /// let ident = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_alphabetic())
1373 /// .repeated()
1374 /// .at_least(1)
1375 /// .collect::<String>();
1376 ///
1377 /// let num = text::int(10)
1378 /// .from_str()
1379 /// .unwrapped();
1380 ///
1381 /// let s_expr = recursive(|s_expr| s_expr
1382 /// .padded()
1383 /// .repeated()
1384 /// .collect::<Vec<_>>()
1385 /// .map(SExpr::List)
1386 /// .delimited_by(just('('), just(')'))
1387 /// .or(ident.map(SExpr::Ident))
1388 /// .or(num.map(SExpr::Num)));
1389 ///
1390 /// // A valid input
1391 /// assert_eq!(
1392 /// s_expr.parse("(add (mul 42 3) 15)").into_result(),
1393 /// Ok(SExpr::List(vec![
1394 /// SExpr::Ident("add".to_string()),
1395 /// SExpr::List(vec![
1396 /// SExpr::Ident("mul".to_string()),
1397 /// SExpr::Num(42),
1398 /// SExpr::Num(3),
1399 /// ]),
1400 /// SExpr::Num(15),
1401 /// ])),
1402 /// );
1403 /// ```
1404 fn delimited_by<U, V, B, C>(self, start: B, end: C) -> DelimitedBy<Self, B, C, U, V>
1405 where
1406 Self: Sized,
1407 B: Parser<'src, I, U, E>,
1408 C: Parser<'src, I, V, E>,
1409 {
1410 DelimitedBy {
1411 parser: self,
1412 start,
1413 end,
1414 phantom: EmptyPhantom::new(),
1415 }
1416 }
1417
1418 /// Parse a pattern, but with an instance of another pattern on either end, yielding the output of the inner.
1419 ///
1420 /// The output type of this parser is `O`, the same as the original parser.
1421 ///
1422 /// # Examples
1423 ///
1424 /// ```
1425 /// # use chumsky::{prelude::*, error::Simple};
1426 /// let ident = text::ascii::ident::<_, extra::Err<Simple<char>>>()
1427 /// .padded_by(just('!'));
1428 ///
1429 /// assert_eq!(ident.parse("!hello!").into_result(), Ok("hello"));
1430 /// assert!(ident.parse("hello!").has_errors());
1431 /// assert!(ident.parse("!hello").has_errors());
1432 /// assert!(ident.parse("hello").has_errors());
1433 /// ```
1434 fn padded_by<U, B>(self, padding: B) -> PaddedBy<Self, B, U>
1435 where
1436 Self: Sized,
1437 B: Parser<'src, I, U, E>,
1438 {
1439 PaddedBy {
1440 parser: self,
1441 padding,
1442 phantom: EmptyPhantom::new(),
1443 }
1444 }
1445
1446 /// Parse one thing or, on failure, another thing.
1447 ///
1448 /// The output of both parsers must be of the same type, because either output can be produced.
1449 ///
1450 /// If both parser succeed, the output of the first parser is guaranteed to be prioritized over the output of the
1451 /// second.
1452 ///
1453 /// If both parsers produce errors, the combinator will attempt to select from or combine the errors to produce an
1454 /// error that is most likely to be useful to a human attempting to understand the problem. The exact algorithm
1455 /// used is left unspecified, and is not part of the crate's semver guarantees, although regressions in error
1456 /// quality should be reported in the issue tracker of the main repository.
1457 ///
1458 /// Please note that long chains of [`Parser::or`] combinators have been known to result in poor compilation times.
1459 /// If you feel you are experiencing this, consider using [`choice`] instead.
1460 ///
1461 /// The output type of this parser is `O`, the output of both parsers.
1462 ///
1463 /// # Examples
1464 ///
1465 /// ```
1466 /// # use chumsky::{prelude::*, error::Simple};
1467 /// let op = just::<_, _, extra::Err<Simple<char>>>('+')
1468 /// .or(just('-'))
1469 /// .or(just('*'))
1470 /// .or(just('/'));
1471 ///
1472 /// assert_eq!(op.parse("+").into_result(), Ok('+'));
1473 /// assert_eq!(op.parse("/").into_result(), Ok('/'));
1474 /// assert!(op.parse("!").has_errors());
1475 /// ```
1476 fn or<B>(self, other: B) -> Or<Self, B>
1477 where
1478 Self: Sized,
1479 B: Parser<'src, I, O, E>,
1480 {
1481 Or {
1482 choice: choice((self, other)),
1483 }
1484 }
1485
1486 /// Attempt to parse something, but only if it exists.
1487 ///
1488 /// If parsing of the pattern is successful, the output is `Some(_)`. Otherwise, the output is `None`.
1489 ///
1490 /// The output type of this parser is `Option<O>`.
1491 ///
1492 /// # Examples
1493 ///
1494 /// ```
1495 /// # use chumsky::{prelude::*, error::Simple};
1496 /// let word = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_alphabetic())
1497 /// .repeated()
1498 /// .at_least(1)
1499 /// .collect::<String>();
1500 ///
1501 /// let word_or_question = word
1502 /// .then(just('?').or_not());
1503 ///
1504 /// assert_eq!(word_or_question.parse("hello?").into_result(), Ok(("hello".to_string(), Some('?'))));
1505 /// assert_eq!(word_or_question.parse("wednesday").into_result(), Ok(("wednesday".to_string(), None)));
1506 /// ```
1507 fn or_not(self) -> OrNot<Self>
1508 where
1509 Self: Sized,
1510 {
1511 OrNot { parser: self }
1512 }
1513
1514 /// Invert the result of the contained parser, failing if it succeeds and succeeding if it fails.
1515 /// The output of this parser is always `()`, the unit type.
1516 ///
1517 /// The motivating case for this is in combination with [`Parser::and_is`], allowing a parser
1518 /// to consume something only if it isn't also something like an escape sequence or a nested block.
1519 ///
1520 /// Caveats:
1521 /// - The error message produced by `not` by default will likely be fairly unhelpful - it can
1522 /// only tell the span that was wrong.
1523 /// - If not careful, it's fairly easy to create non-intuitive behavior due to end-of-input
1524 /// being a valid token for a parser to consume, and as most parsers fail at end of input,
1525 /// `not` will succeed on it.
1526 ///
1527 /// ```
1528 /// # use chumsky::{prelude::*, error::Simple};
1529 ///
1530 /// #[derive(Debug, PartialEq)]
1531 /// enum Tree<'src> {
1532 /// Text(&'src str),
1533 /// Group(Vec<Self>),
1534 /// }
1535 ///
1536 /// // Arbitrary text, nested in a tree with { ... } delimiters
1537 /// let tree = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|tree| {
1538 /// let text = any()
1539 /// .and_is(one_of("{}").not())
1540 /// .repeated()
1541 /// .at_least(1)
1542 /// .to_slice()
1543 /// .map(Tree::Text);
1544 ///
1545 /// let group = tree
1546 /// .repeated()
1547 /// .collect()
1548 /// .delimited_by(just('{'), just('}'))
1549 /// .map(Tree::Group);
1550 ///
1551 /// text.or(group)
1552 /// });
1553 ///
1554 /// assert_eq!(
1555 /// tree.parse("{abcd{efg{hijk}lmn{opq}rs}tuvwxyz}").into_result(),
1556 /// Ok(Tree::Group(vec![
1557 /// Tree::Text("abcd"),
1558 /// Tree::Group(vec![
1559 /// Tree::Text("efg"),
1560 /// Tree::Group(vec![
1561 /// Tree::Text("hijk"),
1562 /// ]),
1563 /// Tree::Text("lmn"),
1564 /// Tree::Group(vec![
1565 /// Tree::Text("opq"),
1566 /// ]),
1567 /// Tree::Text("rs"),
1568 /// ]),
1569 /// Tree::Text("tuvwxyz"),
1570 /// ])),
1571 /// );
1572 /// ```
1573 fn not(self) -> Not<Self, O>
1574 where
1575 Self: Sized,
1576 {
1577 Not {
1578 parser: self,
1579 phantom: EmptyPhantom::new(),
1580 }
1581 }
1582
1583 /// Parse a pattern zero or more times (analog to Regex's `<PAT>*`).
1584 ///
1585 /// Input is eagerly parsed. Be aware that the parser will accept no occurrences of the pattern too. Consider using
1586 /// [`Repeated::at_least`] instead if you wish to parse a minimum number of elements.
1587 ///
1588 /// The output type of this parser is, by default, `()`. If you want to collect the items into a container
1589 /// (such as a [`Vec`]), use [`IterParser::collect`].
1590 ///
1591 /// # Examples
1592 ///
1593 /// ```
1594 /// # use chumsky::{prelude::*, error::Simple};
1595 /// let num = any::<_, extra::Err<Simple<char>>>()
1596 /// .filter(|c: &char| c.is_ascii_digit())
1597 /// .repeated()
1598 /// .at_least(1)
1599 /// .collect::<String>()
1600 /// .from_str()
1601 /// .unwrapped();
1602 ///
1603 /// let sum = num.clone()
1604 /// .foldl(just('+').ignore_then(num).repeated(), |a, b| a + b);
1605 ///
1606 /// assert_eq!(sum.parse("2+13+4+0+5").into_result(), Ok(24));
1607 /// ```
1608 #[cfg_attr(debug_assertions, track_caller)]
1609 fn repeated(self) -> Repeated<Self, O, I, E>
1610 where
1611 Self: Sized,
1612 {
1613 Repeated {
1614 parser: self,
1615 at_least: 0,
1616 at_most: !0,
1617 #[cfg(debug_assertions)]
1618 location: *Location::caller(),
1619 phantom: EmptyPhantom::new(),
1620 }
1621 }
1622
1623 /// Parse a pattern, separated by another, any number of times.
1624 ///
1625 /// You can use [`SeparatedBy::allow_leading`] or [`SeparatedBy::allow_trailing`] to allow leading or trailing
1626 /// separators.
1627 ///
1628 /// The output type of this parser is, by default, `()`. If you want to collect the items into a container
1629 /// (such as a [`Vec`]), use [`IterParser::collect`].
1630 ///
1631 /// # Examples
1632 ///
1633 /// ```
1634 /// # use chumsky::{prelude::*, error::Simple};
1635 /// let shopping = text::ascii::ident::<_, extra::Err<Simple<char>>>()
1636 /// .padded()
1637 /// .separated_by(just(','))
1638 /// .collect::<Vec<_>>();
1639 ///
1640 /// assert_eq!(shopping.parse("eggs").into_result(), Ok(vec!["eggs"]));
1641 /// assert_eq!(shopping.parse("eggs, flour, milk").into_result(), Ok(vec!["eggs", "flour", "milk"]));
1642 /// ```
1643 ///
1644 /// See [`SeparatedBy::allow_leading`] and [`SeparatedBy::allow_trailing`] for more examples.
1645 #[cfg_attr(debug_assertions, track_caller)]
1646 fn separated_by<U, B>(self, separator: B) -> SeparatedBy<Self, B, O, U, I, E>
1647 where
1648 Self: Sized,
1649 B: Parser<'src, I, U, E>,
1650 {
1651 SeparatedBy {
1652 parser: self,
1653 separator,
1654 at_least: 0,
1655 at_most: !0,
1656 allow_leading: false,
1657 allow_trailing: false,
1658 #[cfg(debug_assertions)]
1659 location: *Location::caller(),
1660 phantom: EmptyPhantom::new(),
1661 }
1662 }
1663
1664 /// Left-fold the output of the parser into a single value.
1665 ///
1666 /// The output of the original parser must be of type `(A, impl IntoIterator<Item = B>)`.
1667 ///
1668 /// The output type of this parser is `A`, the left-hand component of the original parser's output.
1669 ///
1670 /// # Examples
1671 ///
1672 /// ```
1673 /// # use chumsky::{prelude::*, error::Simple};
1674 /// let int = text::int::<_, extra::Err<Simple<char>>>(10)
1675 /// .from_str()
1676 /// .unwrapped();
1677 ///
1678 /// let sum = int
1679 /// .clone()
1680 /// .foldl(just('+').ignore_then(int).repeated(), |a, b| a + b);
1681 ///
1682 /// assert_eq!(sum.parse("1+12+3+9").into_result(), Ok(25));
1683 /// assert_eq!(sum.parse("6").into_result(), Ok(6));
1684 /// ```
1685 #[cfg_attr(debug_assertions, track_caller)]
1686 fn foldl<B, F, OB>(self, other: B, f: F) -> Foldl<F, Self, B, OB, E>
1687 where
1688 F: Fn(O, OB) -> O,
1689 B: IterParser<'src, I, OB, E>,
1690 Self: Sized,
1691 {
1692 Foldl {
1693 parser_a: self,
1694 parser_b: other,
1695 folder: f,
1696 phantom: EmptyPhantom::new(),
1697 }
1698 }
1699
1700 /// Left-fold the output of the parser into a single value, making use of the parser's state when doing so.
1701 ///
1702 /// The output of the original parser must be of type `(A, impl IntoIterator<Item = B>)`.
1703 ///
1704 /// The output type of this parser is `A`, the left-hand component of the original parser's output.
1705 ///
1706 /// # Examples
1707 ///
1708 /// ## General
1709 ///
1710 /// ```
1711 /// # use chumsky::{prelude::*, error::Simple, extra::SimpleState};
1712 /// let int = text::int::<_, extra::Full<Simple<char>, SimpleState<i32>, ()>>(10)
1713 /// .from_str()
1714 /// .unwrapped();
1715 ///
1716 /// let sum = int
1717 /// .clone()
1718 /// .foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * **e.state());
1719 ///
1720 /// let mut multiplier = SimpleState(2i32);
1721 /// assert_eq!(sum.parse_with_state("1+12+3+9", &mut multiplier).into_result(), Ok(134));
1722 /// assert_eq!(sum.parse_with_state("6", &mut multiplier).into_result(), Ok(6));
1723 /// ```
1724 ///
1725 /// ## Interning / Arena Allocation
1726 ///
1727 /// This example assumes use of the `slotmap` crate for arena allocation.
1728 ///
1729 /// ```
1730 /// # use chumsky::prelude::*;
1731 /// use slotmap::{new_key_type, SlotMap};
1732 ///
1733 /// // Metadata type for node Ids for extra type safety
1734 /// new_key_type! {
1735 /// pub struct NodeId;
1736 /// }
1737 ///
1738 /// // AST nodes reference other nodes with `NodeId`s instead of containing boxed/owned values
1739 /// #[derive(Copy, Clone, Debug, PartialEq)]
1740 /// enum Expr {
1741 /// Int(i32),
1742 /// Add(NodeId, NodeId),
1743 /// }
1744 ///
1745 /// type NodeArena = SlotMap<NodeId, Expr>;
1746 ///
1747 /// // Now, define our parser
1748 /// let int = text::int::<&str, extra::Full<Simple<char>, extra::SimpleState<NodeArena>, ()>>(10)
1749 /// .padded()
1750 /// .map_with(|s, e|
1751 /// // Return the ID of the new integer node
1752 /// e.state().insert(Expr::Int(s.parse().unwrap()))
1753 /// );
1754 ///
1755 /// let sum = int.foldl_with(
1756 /// just('+').padded().ignore_then(int).repeated(),
1757 /// |a: NodeId, b: NodeId, e| {
1758 /// // Inserting an item into the arena returns its ID
1759 /// e.state().insert(Expr::Add(a, b))
1760 /// }
1761 /// );
1762 ///
1763 /// // Test our parser
1764 /// let mut arena = extra::SimpleState(NodeArena::default());
1765 /// let four_plus_eight = sum.parse_with_state("4 + 8", &mut arena).unwrap();
1766 /// if let Expr::Add(a, b) = arena[four_plus_eight] {
1767 /// assert_eq!(arena[a], Expr::Int(4));
1768 /// assert_eq!(arena[b], Expr::Int(8));
1769 /// } else {
1770 /// panic!("Not an Expr::Add");
1771 /// }
1772 /// ```
1773 #[cfg_attr(debug_assertions, track_caller)]
1774 fn foldl_with<B, F, OB>(self, other: B, f: F) -> FoldlWith<F, Self, B, OB, E>
1775 where
1776 F: Fn(O, OB, &mut MapExtra<'src, '_, I, E>) -> O,
1777 B: IterParser<'src, I, OB, E>,
1778 Self: Sized,
1779 {
1780 FoldlWith {
1781 parser_a: self,
1782 parser_b: other,
1783 folder: f,
1784 phantom: EmptyPhantom::new(),
1785 }
1786 }
1787
1788 /// Parse a pattern. Afterwards, the input stream will be rewound to its original state, as if parsing had not
1789 /// occurred.
1790 ///
1791 /// This combinator is useful for cases in which you wish to avoid a parser accidentally consuming too much input,
1792 /// causing later parsers to fail as a result. A typical use-case of this is that you want to parse something that
1793 /// is not followed by something else.
1794 ///
1795 /// The output type of this parser is `O`, the same as the original parser.
1796 ///
1797 /// # Examples
1798 ///
1799 /// ```
1800 /// # use chumsky::prelude::*;
1801 /// let just_numbers = text::digits::<_, extra::Err<Simple<char>>>(10)
1802 /// .to_slice()
1803 /// .padded()
1804 /// .then_ignore(none_of("+-*/").rewind())
1805 /// .separated_by(just(','))
1806 /// .collect::<Vec<_>>();
1807 /// // 3 is not parsed because it's followed by '+'.
1808 /// assert_eq!(just_numbers.lazy().parse("1, 2, 3 + 4").into_result(), Ok(vec!["1", "2"]));
1809 /// ```
1810 fn rewind(self) -> Rewind<Self>
1811 where
1812 Self: Sized,
1813 {
1814 Rewind { parser: self }
1815 }
1816
1817 /// Make the parser lazy, such that it parses as much of the input as it can finishes successfully, leaving the trailing input untouched.
1818 ///
1819 /// The output type of this parser is `O`, the same as the original parser.
1820 ///
1821 /// # Examples
1822 ///
1823 /// ```
1824 /// # use chumsky::prelude::*;
1825 /// let digits = one_of::<_, _, extra::Err<Simple<char>>>('0'..='9')
1826 /// .repeated()
1827 /// .collect::<String>()
1828 /// .lazy();
1829 ///
1830 /// assert_eq!(digits.parse("12345abcde").into_result().as_deref(), Ok("12345"));
1831 /// ```
1832 fn lazy(self) -> Lazy<'src, Self, I, E>
1833 where
1834 Self: Sized,
1835 I: ValueInput<'src>,
1836 {
1837 self.then_ignore(any().repeated())
1838 }
1839
1840 /// Parse a pattern, ignoring any amount of whitespace both before and after the pattern.
1841 ///
1842 /// The output type of this parser is `O`, the same as the original parser.
1843 ///
1844 /// # Examples
1845 ///
1846 /// ```
1847 /// # use chumsky::prelude::*;
1848 /// let ident = text::ascii::ident::<_, extra::Err<Simple<char>>>().padded();
1849 ///
1850 /// // A pattern with no whitespace surrounding it is accepted
1851 /// assert_eq!(ident.parse("hello").into_result(), Ok("hello"));
1852 /// // A pattern with arbitrary whitespace surrounding it is also accepted
1853 /// assert_eq!(ident.parse(" \t \n \t world \t ").into_result(), Ok("world"));
1854 /// ```
1855 fn padded(self) -> Padded<Self>
1856 where
1857 Self: Sized,
1858 I: Input<'src>,
1859 I::Token: Char,
1860 {
1861 Padded { parser: self }
1862 }
1863
1864 // /// Flatten a nested collection.
1865 // ///
1866 // /// This use-cases of this method are broadly similar to those of [`Iterator::flatten`].
1867 // ///
1868 // /// The output type of this parser is `Vec<T>`, where the original parser output was
1869 // /// `impl IntoIterator<Item = impl IntoIterator<Item = T>>`.
1870 // fn flatten<T, Inner>(self) -> Map<Self, O, fn(O) -> Vec<T>>
1871 // where
1872 // Self: Sized,
1873 // O: IntoIterator<Item = Inner>,
1874 // Inner: IntoIterator<Item = T>,
1875 // {
1876 // self.map(|xs| xs.into_iter().flat_map(|xs| xs.into_iter()).collect())
1877 // }
1878
1879 /// Apply a fallback recovery strategy to this parser should it fail.
1880 ///
1881 /// There is no silver bullet for error recovery, so this function allows you to specify one of several different
1882 /// strategies at the location of your choice. Prefer an error recovery strategy that more precisely mirrors valid
1883 /// syntax where possible to make error recovery more reliable.
1884 ///
1885 /// Because chumsky is a [PEG](https://en.m.wikipedia.org/wiki/Parsing_expression_grammar) parser, which always
1886 /// take the first successful parsing route through a grammar, recovering from an error may cause the parser to
1887 /// erroneously miss alternative valid routes through the grammar that do not generate recoverable errors. If you
1888 /// run into cases where valid syntax fails to parse without errors, this might be happening: consider removing
1889 /// error recovery or switching to a more specific error recovery strategy.
1890 ///
1891 /// The output type of this parser is `O`, the same as the original parser.
1892 ///
1893 /// # Examples
1894 ///
1895 /// ```
1896 /// # use chumsky::{prelude::*, error::Simple};
1897 /// #[derive(Debug, PartialEq)]
1898 /// enum Expr<'src> {
1899 /// Error,
1900 /// Int(&'src str),
1901 /// List(Vec<Expr<'src>>),
1902 /// }
1903 ///
1904 /// let recovery = just::<_, _, extra::Err<Simple<char>>>('[')
1905 /// .then(none_of(']').repeated().then(just(']')));
1906 ///
1907 /// let expr = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|expr| expr
1908 /// .separated_by(just(','))
1909 /// .collect::<Vec<_>>()
1910 /// .delimited_by(just('['), just(']'))
1911 /// .map(Expr::List)
1912 /// // If parsing a list expression fails, recover at the next delimiter, generating an error AST node
1913 /// .recover_with(via_parser(recovery.map(|_| Expr::Error)))
1914 /// .or(text::int(10).map(Expr::Int))
1915 /// .padded());
1916 ///
1917 /// assert!(expr.parse("five").has_errors()); // Text is not a valid expression in this language...
1918 /// assert_eq!(
1919 /// expr.parse("[1, 2, 3]").into_result(),
1920 /// Ok(Expr::List(vec![Expr::Int("1"), Expr::Int("2"), Expr::Int("3")])),
1921 /// ); // ...but lists and numbers are!
1922 ///
1923 /// // This input has two syntax errors...
1924 /// let res = expr.parse("[[1, two], [3, four]]");
1925 /// // ...and error recovery allows us to catch both of them!
1926 /// assert_eq!(res.errors().len(), 2);
1927 /// // Additionally, the AST we get back still has useful information.
1928 /// assert_eq!(res.output(), Some(&Expr::List(vec![Expr::Error, Expr::Error])));
1929 /// ```
1930 fn recover_with<S: Strategy<'src, I, O, E>>(self, strategy: S) -> RecoverWith<Self, S>
1931 where
1932 Self: Sized,
1933 {
1934 RecoverWith {
1935 parser: self,
1936 strategy,
1937 }
1938 }
1939
1940 /// Map the primary error of this parser to another value.
1941 ///
1942 /// This function is most useful when using a custom error type, allowing you to augment errors according to
1943 /// context.
1944 ///
1945 /// The output type of this parser is `O`, the same as the original parser.
1946 // TODO: Map E -> D, not E -> E
1947 fn map_err<F>(self, f: F) -> MapErr<Self, F>
1948 where
1949 Self: Sized,
1950 F: Fn(E::Error) -> E::Error,
1951 {
1952 MapErr {
1953 parser: self,
1954 mapper: f,
1955 }
1956 }
1957
1958 // /// Map the primary error of this parser to another value, making use of the span from the start of the attempted
1959 // /// to the point at which the error was encountered.
1960 // ///
1961 // /// This function is useful for augmenting errors to allow them to display the span of the initial part of a
1962 // /// pattern, for example to add a "while parsing" clause to your error messages.
1963 // ///
1964 // /// The output type of this parser is `O`, the same as the original parser.
1965 // ///
1966 // // TODO: Map E -> D, not E -> E
1967 // fn map_err_with_span<F>(self, f: F) -> MapErrWithSpan<Self, F>
1968 // where
1969 // Self: Sized,
1970 // F: Fn(E::Error, I::Span) -> E::Error,
1971 // {
1972 // MapErrWithSpan {
1973 // parser: self,
1974 // mapper: f,
1975 // }
1976 // }
1977
1978 /// Map the primary error of this parser to another value, making use of the parser state.
1979 ///
1980 /// This function is useful for augmenting errors to allow them to include context in non context-free
1981 /// languages, or provide contextual notes on possible causes.
1982 ///
1983 /// The output type of this parser is `O`, the same as the original parser.
1984 ///
1985 // TODO: Map E -> D, not E -> E
1986 fn map_err_with_state<F>(self, f: F) -> MapErrWithState<Self, F>
1987 where
1988 Self: Sized,
1989 F: Fn(E::Error, I::Span, &mut E::State) -> E::Error,
1990 {
1991 MapErrWithState {
1992 parser: self,
1993 mapper: f,
1994 }
1995 }
1996
1997 /// Map the primary error of this parser to another value, making use of the parser state and
1998 /// context.
1999 ///
2000 /// This function is useful for augmenting errors to allow them to include context in non context-free
2001 /// languages, or provide contextual notes on possible causes.
2002 ///
2003 /// The output type of this parser is `O`, the same as the original parser.
2004 ///
2005 /// Note: Chumsky permits parsers to leave the input in an unspecified state after an error
2006 /// occurs and backtracking begins. As such, the output of [`MapExtra::span`] and
2007 /// [`MapExtra::slice`] is unspecified within this function.
2008 ///
2009 fn map_err_with<F>(self, f: F) -> MapErrWith<Self, F>
2010 where
2011 Self: Sized,
2012 F: Fn(E::Error, &mut MapExtra<'src, '_, I, E>) -> E::Error,
2013 {
2014 MapErrWith {
2015 parser: self,
2016 mapper: f,
2017 }
2018 }
2019
2020 /// Validate an output, producing non-terminal errors if it does not fulfill certain criteria.
2021 /// The errors will not immediately halt parsing on this path, but instead it will continue,
2022 /// potentially emitting one or more other errors, only failing after the pattern has otherwise
2023 /// successfully, or emitted another terminal error.
2024 ///
2025 /// This function also permits mapping the output to a value of another type, similar to [`Parser::map`].
2026 ///
2027 /// If you wish parsing of this pattern to halt when an error is generated instead of continuing, consider using
2028 /// [`Parser::try_map`] instead.
2029 ///
2030 /// The output type of this parser is `U`, the result of the validation closure.
2031 ///
2032 /// # Examples
2033 ///
2034 /// ```
2035 /// # use chumsky::prelude::*;
2036 /// let large_int = text::int::<_, extra::Err<Rich<char>>>(10)
2037 /// .from_str()
2038 /// .unwrapped()
2039 /// .validate(|x: u32, e, emitter| {
2040 /// if x < 256 { emitter.emit(Rich::custom(e.span(), format!("{} must be 256 or higher.", x))) }
2041 /// x
2042 /// });
2043 ///
2044 /// assert_eq!(large_int.parse("537").into_result(), Ok(537));
2045 /// assert!(large_int.parse("243").into_result().is_err());
2046 /// ```
2047 ///
2048 /// To show the difference in behavior from [`Parser::try_map`]:
2049 ///
2050 /// ```
2051 /// # use chumsky::{text::TextExpected, util::MaybeRef, error::LabelError, prelude::*};
2052 ///
2053 /// // Start with the same large_int validator
2054 /// let large_int_val = text::int::<_, extra::Err<Rich<char>>>(10)
2055 /// .from_str()
2056 /// .unwrapped()
2057 /// .validate(|x: u32, e, emitter| {
2058 /// if x < 256 { emitter.emit(Rich::custom(e.span(), format!("{} must be 256 or higher", x))) }
2059 /// x
2060 /// });
2061 ///
2062 /// // A try_map version of the same parser
2063 /// let large_int_tm = text::int::<_, extra::Err<Rich<char>>>(10)
2064 /// .from_str()
2065 /// .unwrapped()
2066 /// .try_map(|x: u32, span| {
2067 /// if x < 256 {
2068 /// Err(Rich::custom(span, format!("{} must be 256 or higher", x)))
2069 /// } else {
2070 /// Ok(x)
2071 /// }
2072 /// });
2073 ///
2074 /// // Parser that uses the validation version
2075 /// let multi_step_val = large_int_val.then(text::ascii::ident().padded());
2076 /// // Parser that uses the try_map version
2077 /// let multi_step_tm = large_int_tm.then(text::ascii::ident().padded());
2078 ///
2079 /// // On success, both parsers are equivalent
2080 /// assert_eq!(
2081 /// multi_step_val.parse("512 foo").into_result(),
2082 /// Ok((512, "foo"))
2083 /// );
2084 ///
2085 /// assert_eq!(
2086 /// multi_step_tm.parse("512 foo").into_result(),
2087 /// Ok((512, "foo"))
2088 /// );
2089 ///
2090 /// // However, on failure, they may produce different errors:
2091 /// assert_eq!(
2092 /// multi_step_val.parse("100 2").into_result(),
2093 /// Err(vec![
2094 /// Rich::<char>::custom((0..3).into(), "100 must be 256 or higher"),
2095 /// <Rich<char> as LabelError<&str, _>>::expected_found([TextExpected::<&str>::AnyIdentifier], Some(MaybeRef::Val('2')), (4..5).into()),
2096 /// ])
2097 /// );
2098 ///
2099 /// assert_eq!(
2100 /// multi_step_tm.parse("100 2").into_result(),
2101 /// Err(vec![Rich::<char>::custom((0..3).into(), "100 must be 256 or higher")])
2102 /// );
2103 /// ```
2104 ///
2105 /// As is seen in the above example, validation doesn't prevent the emission of later errors in the
2106 /// same parser, but still produces an error in the output.
2107 ///
2108 fn validate<U, F>(self, f: F) -> Validate<Self, O, F>
2109 where
2110 Self: Sized,
2111 F: Fn(O, &mut MapExtra<'src, '_, I, E>, &mut Emitter<E::Error>) -> U,
2112 {
2113 Validate {
2114 parser: self,
2115 validator: f,
2116 phantom: EmptyPhantom::new(),
2117 }
2118 }
2119
2120 // /// Map the primary error of this parser to a result. If the result is [`Ok`], the parser succeeds with that value.
2121 // ///
2122 // /// Note that, if the closure returns [`Err`], the parser will not consume any input.
2123 // ///
2124 // /// The output type of this parser is `U`, the [`Ok`] type of the result.
2125 // fn or_else<F>(self, f: F) -> OrElse<Self, F>
2126 // where
2127 // Self: Sized,
2128 // F: Fn(E::Error) -> Result<O, E::Error>,
2129 // {
2130 // OrElse {
2131 // parser: self,
2132 // or_else: f,
2133 // }
2134 // }
2135
2136 /// Attempt to convert the output of this parser into something else using Rust's [`FromStr`] trait.
2137 ///
2138 /// This is most useful when wanting to convert literal values into their corresponding Rust type, such as when
2139 /// parsing integers.
2140 ///
2141 /// The output type of this parser is `Result<U, U::Err>`, the result of attempting to parse the output, `O`, into
2142 /// the value `U`.
2143 ///
2144 /// # Examples
2145 ///
2146 /// ```
2147 /// # use chumsky::prelude::*;
2148 /// let uint64 = text::int::<_, extra::Err<Simple<char>>>(10)
2149 /// .from_str::<u64>()
2150 /// .unwrapped();
2151 ///
2152 /// assert_eq!(uint64.parse("7").into_result(), Ok(7));
2153 /// assert_eq!(uint64.parse("42").into_result(), Ok(42));
2154 /// ```
2155 #[allow(clippy::wrong_self_convention)]
2156 fn from_str<U>(self) -> Map<Self, O, fn(O) -> Result<U, U::Err>>
2157 where
2158 Self: Sized,
2159 U: FromStr,
2160 O: AsRef<str>,
2161 {
2162 self.map(|o| o.as_ref().parse())
2163 }
2164
2165 /// For parsers that produce a [`Result`] as their output, unwrap the result (panicking if an [`Err`] is
2166 /// encountered).
2167 ///
2168 /// In general, this method should be avoided except in cases where all possibilities that the parser might produce can
2169 /// be parsed by using [`FromStr`] without producing an error.
2170 ///
2171 /// This combinator is not named `unwrap` to avoid confusion: it unwraps *during parsing*, not immediately.
2172 ///
2173 /// The output type of this parser is `U`, the [`Ok`] value of the [`Result`].
2174 ///
2175 /// # Examples
2176 ///
2177 /// ```
2178 /// # use chumsky::prelude::*;
2179 /// let boolean = just::<_, _, extra::Err<Simple<char>>>("true")
2180 /// .or(just("false"))
2181 /// .from_str::<bool>()
2182 /// .unwrapped(); // Cannot panic: the only possible outputs generated by the parser are "true" or "false"
2183 ///
2184 /// assert_eq!(boolean.parse("true").into_result(), Ok(true));
2185 /// assert_eq!(boolean.parse("false").into_result(), Ok(false));
2186 /// // Does not panic, because the original parser only accepts "true" or "false"
2187 /// assert!(boolean.parse("42").has_errors());
2188 /// ```
2189 #[track_caller]
2190 fn unwrapped(self) -> Unwrapped<Self, O>
2191 where
2192 Self: Sized,
2193 {
2194 Unwrapped {
2195 parser: self,
2196 #[cfg(debug_assertions)]
2197 location: *Location::caller(),
2198 phantom: EmptyPhantom::new(),
2199 }
2200 }
2201
2202 /// Turn this [`Parser`] into an [`IterParser`] if its output type implements [`IntoIterator`].
2203 ///
2204 /// The resulting iterable parser will emit each element of the output type in turn.
2205 ///
2206 /// This is *broadly* analogous to functions like [`Vec::into_iter`], but operating at the level of parser outputs.
2207 ///
2208 /// # Examples
2209 ///
2210 /// ```
2211 /// # use chumsky::prelude::*;
2212 /// // Parses whole integers
2213 /// let num = text::int::<&str, extra::Default>(10).padded().map(|x: &str| x.parse::<u64>().unwrap());
2214 /// // Parses a range like `0..4` into a vector like `[0, 1, 2, 3]`
2215 /// let range = num.then_ignore(just("..")).then(num)
2216 /// .map(|(x, y)| x..y)
2217 /// .into_iter()
2218 /// .collect::<Vec<u64>>();
2219 /// // Parses a list of numbers into a vector
2220 /// let list = num.separated_by(just(',')).collect::<Vec<u64>>();
2221 /// let set = range.or(list);
2222 /// assert_eq!(set.parse("0, 1, 2, 3").unwrap(), [0, 1, 2, 3]);
2223 /// assert_eq!(set.parse("0..4").unwrap(), [0, 1, 2, 3]);
2224 /// ```
2225 fn into_iter(self) -> IntoIter<Self, O>
2226 where
2227 Self: Sized,
2228 O: IntoIterator,
2229 {
2230 IntoIter {
2231 parser: self,
2232 phantom: EmptyPhantom::new(),
2233 }
2234 }
2235
2236 /// Box the parser, yielding a parser that performs parsing through dynamic dispatch.
2237 ///
2238 /// Boxing a parser might be useful for:
2239 ///
2240 /// - Dynamically building up parsers at run-time
2241 ///
2242 /// - Improving compilation times (Rust can struggle to compile code containing very long types)
2243 ///
2244 /// - Passing a parser over an FFI boundary
2245 ///
2246 /// - Getting around compiler implementation problems with long types such as
2247 /// [this](https://github.com/rust-lang/rust/issues/54540).
2248 ///
2249 /// - Places where you need to name the type of a parser
2250 ///
2251 /// Boxing a parser is broadly equivalent to boxing other combinators via dynamic dispatch, such as [`Iterator`].
2252 ///
2253 /// The output type of this parser is `O`, the same as the original parser.
2254 ///
2255 /// # Examples
2256 ///
2257 /// When not using `boxed`, the following patterns are either impossible or very difficult to express:
2258 ///
2259 /// ```compile_fail
2260 /// # use chumsky::prelude::*;
2261 ///
2262 /// pub trait Parseable: Sized {
2263 /// type Parser<'src>: Parser<'src, &'src str, Self>;
2264 ///
2265 /// fn parser<'src>() -> Self::Parser<'src>;
2266 /// }
2267 ///
2268 /// impl Parseable for i32 {
2269 /// // We *can* write this type, but it will be very impractical, and change on any alterations
2270 /// // to the implementation
2271 /// type Parser<'src> = ???;
2272 ///
2273 /// fn parser<'src>() -> Self::Parser<'src> {
2274 /// todo()
2275 /// }
2276 /// }
2277 /// ```
2278 ///
2279 /// ```compile_fail
2280 /// # use chumsky::prelude::*;
2281 /// # fn user_input<'src>() -> impl IntoIterator<Item = impl Parser<'src, &'src str, char>> { [just('b')] }
2282 ///
2283 /// let user_input = user_input();
2284 ///
2285 /// let mut parser = just('a');
2286 /// for i in user_input {
2287 /// // Doesn't work due to type mismatch - since every combinator creates a unique type
2288 /// parser = parser.or(i);
2289 /// }
2290 ///
2291 /// let parser = parser.then(just('z'));
2292 /// let _ = parser.parse("b").into_result();
2293 /// ```
2294 ///
2295 /// However, with `boxed`, we can express them by making the parsers all share a common type:
2296 ///
2297 /// ```
2298 /// use chumsky::prelude::*;
2299 ///
2300 /// pub trait Parseable: Sized {
2301 /// fn parser<'src>() -> Boxed<'src, 'src, &'src str, Self>;
2302 /// }
2303 ///
2304 /// impl Parseable for i32 {
2305 /// fn parser<'src>() -> Boxed<'src, 'src, &'src str, Self> {
2306 /// todo().boxed()
2307 /// }
2308 /// }
2309 /// ```
2310 ///
2311 /// ```
2312 /// # use chumsky::prelude::*;
2313 /// # fn user_input<'src>() -> impl IntoIterator<Item = impl Parser<'src, &'src str, char>> { [just('b'), just('c')] }
2314 /// let user_input = user_input();
2315 /// let mut parser = just('a').boxed();
2316 /// for i in user_input {
2317 /// // Doesn't work due to type mismatch - since every combinator creates a unique type
2318 /// parser = parser.or(i).boxed();
2319 /// }
2320 /// let parser = parser.then(just('z'));
2321 /// parser.parse("az").into_result().unwrap();
2322 /// ```
2323 ///
2324 fn boxed<'b>(self) -> Boxed<'src, 'b, I, O, E>
2325 where
2326 Self: Sized + 'b,
2327 {
2328 Boxed {
2329 inner: Rc::new(self),
2330 }
2331 }
2332
2333 /// Simplify the type of the parser using Rust's `impl Trait` syntax.
2334 ///
2335 /// The only reason for using this function is to make Rust's compiler errors easier to debug: it does not change
2336 /// the behaviour of the parser at all, and is in fact just a simple identity function.
2337 #[cfg(feature = "nightly")]
2338 fn simplify(self) -> impl Parser<'src, I, O, E>
2339 where
2340 Self: Sized + 'src,
2341 {
2342 self
2343 }
2344
2345 /// Have this parser be enabled or disabled depending on context.
2346 ///
2347 /// This method, by itself, does nothing: you must use [`ConfigParser::configure`] to specify when the parser is
2348 /// enabled.
2349 ///
2350 /// # Example
2351 ///
2352 /// ```
2353 /// # use chumsky::prelude::*;
2354 ///
2355 /// // Our parser can be in two modes depending on context: hexadecimal, or denary
2356 /// #[derive(Clone)]
2357 /// enum Mode { Hex, Dec }
2358 ///
2359 /// let digits = one_of::<_, _, extra::Context<Mode>>("0123456789")
2360 /// .or(one_of("abcdef").contextual().configure(|cfg, ctx| matches!(ctx, Mode::Hex)))
2361 /// .repeated();
2362 ///
2363 /// let num = just::<_, _, extra::Default>("0x").ignore_then(digits.with_ctx(Mode::Hex))
2364 /// // Fallback: when '0x' isn't present, parse using denary mode
2365 /// .or(digits.with_ctx(Mode::Dec))
2366 /// .to_slice();
2367 ///
2368 /// assert_eq!(num.parse("0x1a3f5b").into_result(), Ok("0x1a3f5b"));
2369 /// assert_eq!(num.parse("12345").into_result(), Ok("12345"));
2370 /// // Without the '0x' prefix, hexadecimal digits are invalid
2371 /// assert!(num.parse("1a3f5b").has_errors());
2372 /// ```
2373 fn contextual(self) -> Contextual<Self>
2374 where
2375 Self: Sized,
2376 {
2377 Contextual { inner: self }
2378 }
2379
2380 /// Use [Pratt parsing](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing) to ergonomically
2381 /// parse this pattern separated by prefix, postfix, and infix operators of various associativites and precedence.
2382 ///
2383 /// Pratt parsing is a powerful technique and is recommended when writing parsers for expressions.
2384 ///
2385 /// # Example
2386 ///
2387 /// See the documentation in [`pratt`] for more extensive examples and details.
2388 ///
2389 /// ```
2390 /// # use chumsky::prelude::*;
2391 /// use chumsky::pratt::*;
2392 ///
2393 /// let int = text::int::<_, extra::Err<Rich<char>>>(10)
2394 /// .from_str()
2395 /// .unwrapped()
2396 /// .padded();
2397 ///
2398 /// let op = |c| just(c).padded();
2399 ///
2400 /// let expr = int.pratt((
2401 /// prefix(2, op('-'), |_, x: i64, _| -x),
2402 /// infix(left(1), op('*'), |x, _, y, _| x * y),
2403 /// infix(left(1), op('/'), |x, _, y, _| x / y),
2404 /// infix(left(0), op('+'), |x, _, y, _| x + y),
2405 /// infix(left(0), op('-'), |x, _, y, _| x - y),
2406 /// ));
2407 ///
2408 /// // Pratt parsing can handle unary operators...
2409 /// assert_eq!(expr.parse("-7").into_result(), Ok(-7));
2410 /// // ...and infix binary operators...
2411 /// assert_eq!(expr.parse("6 + 3").into_result(), Ok(9));
2412 /// // ...and arbitrary precedence levels between them.
2413 /// assert_eq!(expr.parse("2 + 3 * -4").into_result(), Ok(-10));
2414 /// ```
2415 #[cfg(feature = "pratt")]
2416 fn pratt<Ops>(self, ops: Ops) -> pratt::Pratt<Self, Ops>
2417 where
2418 Self: Sized,
2419 {
2420 pratt::Pratt { atom: self, ops }
2421 }
2422}
2423
2424#[cfg(feature = "nightly")]
2425impl<'src, I, O, E> Parser<'src, I, O, E> for !
2426where
2427 I: Input<'src>,
2428 E: ParserExtra<'src, I>,
2429{
2430 fn go<M: Mode>(&self, _inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
2431 *self
2432 }
2433
2434 go_extra!(O);
2435}
2436
2437/// A [`Parser`] that can be configured with runtime context.
2438///
2439/// This allows for context-sensitive parsing
2440/// of input. Note that chumsky only supports 'left'-sensitive parsing, where the context for a parser
2441/// is derived from earlier in the input.
2442///
2443/// Chumsky distinguishes 'state' from 'context'. State is not able to change what input a parser
2444/// accepts, but may be used to change the contents of the type it emits. In this way state is expected
2445/// to be idempotent - combinators such as [`Parser::map_with`] are allowed to not call the
2446/// provided closure at all if they don't emit any output. Context and configuration, on the other hand,
2447/// is used to change what kind of input a parser may accept, and thus must always be evaluated. Context
2448/// isn't usable in any map combinator however - while it may affect accepted input, it is not expected
2449/// to change the final result outside of how it changes what the parser itself returns.
2450///
2451/// Not all parsers currently support configuration. If you feel like you need a parser to be configurable
2452/// and it isn't currently, please open an issue on the issue tracker of the main repository.
2453pub trait ConfigParser<'src, I, O, E>: Parser<'src, I, O, E>
2454where
2455 I: Input<'src>,
2456 E: ParserExtra<'src, I>,
2457{
2458 /// A type describing the configurable aspects of the parser.
2459 type Config: Default;
2460
2461 #[doc(hidden)]
2462 fn go_cfg<M: Mode>(
2463 &self,
2464 inp: &mut InputRef<'src, '_, I, E>,
2465 cfg: Self::Config,
2466 ) -> PResult<M, O>;
2467
2468 #[doc(hidden)]
2469 #[inline(always)]
2470 fn go_emit_cfg(
2471 &self,
2472 inp: &mut InputRef<'src, '_, I, E>,
2473 cfg: Self::Config,
2474 ) -> PResult<Emit, O> {
2475 self.go_cfg::<Emit>(inp, cfg)
2476 }
2477 #[doc(hidden)]
2478 #[inline(always)]
2479 fn go_check_cfg(
2480 &self,
2481 inp: &mut InputRef<'src, '_, I, E>,
2482 cfg: Self::Config,
2483 ) -> PResult<Check, O> {
2484 self.go_cfg::<Check>(inp, cfg)
2485 }
2486
2487 /// A combinator that allows configuration of the parser from the current context. Context
2488 /// is most often derived from [`Parser::ignore_with_ctx`], [`Parser::then_with_ctx`] or [`map_ctx`],
2489 /// and is how chumsky supports parsing things such as indentation-sensitive grammars.
2490 ///
2491 /// # Examples
2492 ///
2493 /// ```
2494 /// # use chumsky::prelude::*;
2495 ///
2496 /// let int = text::int::<_, extra::Err<Rich<char>>>(10)
2497 /// .from_str()
2498 /// .unwrapped();
2499 ///
2500 /// // By default, accepts any number of items
2501 /// let item = text::ascii::ident()
2502 /// .padded()
2503 /// .repeated();
2504 ///
2505 /// // With configuration, we can declare an exact number of items based on a prefix length
2506 /// let len_prefixed_arr = int
2507 /// .ignore_with_ctx(item.configure(|repeat, ctx| repeat.exactly(*ctx)).collect::<Vec<_>>());
2508 ///
2509 /// assert_eq!(
2510 /// len_prefixed_arr.parse("2 foo bar").into_result(),
2511 /// Ok(vec!["foo", "bar"]),
2512 /// );
2513 ///
2514 /// assert_eq!(
2515 /// len_prefixed_arr.parse("0").into_result(),
2516 /// Ok(vec![]),
2517 /// );
2518 ///
2519 /// len_prefixed_arr.parse("3 foo bar baz bam").into_result().unwrap_err();
2520 /// len_prefixed_arr.parse("3 foo bar").into_result().unwrap_err();
2521 /// ```
2522 fn configure<F>(self, cfg: F) -> Configure<Self, F>
2523 where
2524 Self: Sized,
2525 F: Fn(Self::Config, &E::Context) -> Self::Config,
2526 {
2527 Configure { parser: self, cfg }
2528 }
2529}
2530
2531/// Data that is needed by IterParser when debug_assertions are enabled.
2532#[derive(Clone, Copy)]
2533pub struct IterParserDebug {
2534 #[cfg(debug_assertions)]
2535 pub(crate) nonconsumption_is_ok: bool,
2536}
2537
2538impl IterParserDebug {
2539 #[inline(always)]
2540 pub(crate) fn new(#[allow(unused_variables)] nonconsumption_is_ok: bool) -> Self {
2541 Self {
2542 #[cfg(debug_assertions)]
2543 nonconsumption_is_ok,
2544 }
2545 }
2546}
2547
2548/// An iterator that wraps an iterable parser. See [`IterParser::parse_iter`].
2549#[cfg(feature = "unstable")]
2550pub struct ParseIter<
2551 'a,
2552 'src,
2553 'iter,
2554 P: IterParser<'src, I, O, E>,
2555 I: Input<'src>,
2556 O,
2557 E: ParserExtra<'src, I>,
2558> {
2559 parser: &'a mut P,
2560 own: InputOwn<'src, 'iter, I, E>,
2561 iter_state: Option<P::IterState<Emit>>,
2562 #[allow(dead_code)]
2563 phantom: EmptyPhantom<(&'src (), O)>,
2564}
2565
2566#[cfg(feature = "unstable")]
2567impl<'a, 'src, P, I: Input<'src>, O, E: ParserExtra<'src, I>> Iterator
2568 for ParseIter<'a, 'src, '_, P, I, O, E>
2569where
2570 P: IterParser<'src, I, O, E>,
2571{
2572 type Item = O;
2573
2574 fn next(&mut self) -> Option<Self::Item> {
2575 let mut inp = self.own.as_ref_start();
2576 let parser = &self.parser;
2577
2578 let iter_state = match &mut self.iter_state {
2579 Some(state) => state,
2580 None => {
2581 let state = parser.make_iter::<Emit>(&mut inp).ok()?;
2582 self.iter_state = Some(state);
2583 self.iter_state.as_mut().unwrap()
2584 }
2585 };
2586
2587 let res = parser.next::<Emit>(&mut inp, iter_state, IterParserDebug::new(true));
2588 // TODO: Avoid clone
2589 self.own.start = inp.cursor().inner;
2590 res.ok().and_then(|res| res)
2591 }
2592}
2593
2594/// An iterable equivalent of [`Parser`], i.e: a parser that generates a sequence of outputs.
2595pub trait IterParser<'src, I, O, E = extra::Default>
2596where
2597 I: Input<'src>,
2598 E: ParserExtra<'src, I>,
2599{
2600 #[doc(hidden)]
2601 type IterState<M: Mode>
2602 where
2603 I: 'src;
2604
2605 #[doc(hidden)]
2606 fn make_iter<M: Mode>(
2607 &self,
2608 inp: &mut InputRef<'src, '_, I, E>,
2609 ) -> PResult<Emit, Self::IterState<M>>;
2610 #[doc(hidden)]
2611 fn next<M: Mode>(
2612 &self,
2613 inp: &mut InputRef<'src, '_, I, E>,
2614 state: &mut Self::IterState<M>,
2615 debug: IterParserDebug,
2616 ) -> IPResult<M, O>;
2617
2618 #[doc(hidden)]
2619 #[cfg(feature = "debug")]
2620 fn node_info(&self, _scope: &mut debug::NodeScope) -> debug::NodeInfo {
2621 let ty = core::any::type_name::<Self>();
2622 debug::NodeInfo::Unknown(ty.split_once('<').map_or(ty, |(ty, _)| ty).to_string())
2623 }
2624
2625 /// Collect this iterable parser into a container that implements [`FromIterator`].
2626 ///
2627 /// This is commonly useful for collecting parsers that output many values into containers of various kinds:
2628 /// [`Vec`]s, [`String`]s, or even [`HashMap`]s. This method is analogous to [`Iterator::collect`].
2629 ///
2630 /// The output type of this iterable parser is `C`, the type being collected into.
2631 ///
2632 /// # Examples
2633 ///
2634 /// ```
2635 /// # use chumsky::{prelude::*, error::Simple};
2636 /// let word = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_alphabetic()) // This parser produces an output of `char`
2637 /// .repeated() // This parser is iterable (i.e: implements `IterParser`)
2638 /// .collect::<String>(); // We collect the `char`s into a `String`
2639 ///
2640 /// assert_eq!(word.parse("hello").into_result(), Ok("hello".to_string()));
2641 /// ```
2642 #[cfg_attr(debug_assertions, track_caller)]
2643 fn collect<C: FromIterator<O>>(self) -> Collect<Self, O, C>
2644 where
2645 Self: Sized,
2646 {
2647 Collect {
2648 parser: self,
2649 phantom: EmptyPhantom::new(),
2650 }
2651 }
2652
2653 /// Collect this iterable parser into a [`ContainerExactly`].
2654 ///
2655 /// This is useful for situations where the number of items to consume is statically known.
2656 /// A common use-case is collecting into an array.
2657 ///
2658 /// The output type of this iterable parser if `C`, the type being collected into.
2659 ///
2660 /// # Examples
2661 ///
2662 /// ```
2663 /// # use chumsky::{prelude::*, error::Simple};
2664 /// let three_digit = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_numeric())
2665 /// .repeated()
2666 /// .collect_exactly::<[_; 3]>();
2667 ///
2668 /// assert_eq!(three_digit.parse("123").into_result(), Ok(['1', '2', '3']));
2669 /// assert!(three_digit.parse("12").into_result().is_err());
2670 /// assert!(three_digit.parse("1234").into_result().is_err());
2671 /// ```
2672 fn collect_exactly<C: ContainerExactly<O>>(self) -> CollectExactly<Self, O, C>
2673 where
2674 Self: Sized,
2675 {
2676 CollectExactly {
2677 parser: self,
2678 phantom: EmptyPhantom::new(),
2679 }
2680 }
2681
2682 /// Collect this iterable parser into a [`usize`], outputting the number of elements that were parsed.
2683 ///
2684 /// This is sugar for [`.collect::<usize>()`](Self::collect).
2685 ///
2686 /// # Examples
2687 ///
2688 /// ```
2689 /// # use chumsky::prelude::*;
2690 ///
2691 /// // Counts how many chess squares are in the input.
2692 /// let squares = one_of::<_, _, extra::Err<Simple<char>>>('a'..='z').then(one_of('1'..='8')).padded().repeated().count();
2693 ///
2694 /// assert_eq!(squares.parse("a1 b2 c3").into_result(), Ok(3));
2695 /// assert_eq!(squares.parse("e5 e7 c6 c7 f6 d5 e6 d7 e4 c5 d6 c4 b6 f5").into_result(), Ok(14));
2696 /// assert_eq!(squares.parse("").into_result(), Ok(0));
2697 /// ```
2698 fn count(self) -> Count<Self, O>
2699 where
2700 Self: Sized,
2701 {
2702 Count {
2703 parser: self,
2704 phantom: EmptyPhantom::new(),
2705 }
2706 }
2707
2708 /// Enumerate outputs of this iterable parser.
2709 ///
2710 /// This function behaves in a similar way to [`Iterator::enumerate`].
2711 ///
2712 /// The output type of this iterable parser is `(usize, O)`.
2713 ///
2714 /// # Examples
2715 ///
2716 /// ```
2717 /// # use chumsky::{prelude::*, error::Simple};
2718 /// let word = text::ascii::ident::<_, extra::Err<Simple<char>>>()
2719 /// .padded()
2720 /// .repeated() // This parser is iterable (i.e: implements `IterParser`)
2721 /// .enumerate()
2722 /// .collect::<Vec<(usize, &str)>>();
2723 ///
2724 /// assert_eq!(word.parse("hello world").into_result(), Ok(vec![(0, "hello"), (1, "world")]));
2725 /// ```
2726 fn enumerate(self) -> Enumerate<Self, O>
2727 where
2728 Self: Sized,
2729 {
2730 Enumerate {
2731 parser: self,
2732 phantom: EmptyPhantom::new(),
2733 }
2734 }
2735
2736 /// Fold the output of the parser into the given accumulator.
2737 ///
2738 /// The output type of this iterable parser is `B`, the accumulator type.
2739 ///
2740 /// # Examples
2741 ///
2742 /// ```
2743 /// # use chumsky::{prelude::*, error::Simple};
2744 /// let int = text::int::<_, extra::Err<Simple<char>>>(10)
2745 /// .from_str::<u32>()
2746 /// .unwrapped();
2747 ///
2748 /// let sum = int
2749 /// .padded()
2750 /// .repeated()
2751 /// .fold(0, |sum, x| sum + x);
2752 ///
2753 /// assert_eq!(sum.parse("3 7 2").into_result(), Ok(12));
2754 /// assert_eq!(sum.parse("").into_result(), Ok(0));
2755 /// assert_eq!(sum.parse("42 1").into_result(), Ok(43));
2756 /// ```
2757 #[cfg_attr(debug_assertions, track_caller)]
2758 fn fold<B, F>(self, init: B, f: F) -> Fold<F, Self, B, O, E>
2759 where
2760 B: Clone,
2761 F: Fn(B, O) -> B,
2762 Self: Sized,
2763 {
2764 Fold {
2765 parser: self,
2766 init,
2767 folder: f,
2768 phantom: EmptyPhantom::new(),
2769 }
2770 }
2771
2772 /// Right-fold the output of the parser into a single value.
2773 ///
2774 /// The output type of this iterable parser is `B`, the right-hand component of the original parser's output.
2775 ///
2776 /// # Examples
2777 ///
2778 /// ```
2779 /// # use chumsky::{prelude::*, error::Simple};
2780 /// let int = text::int::<_, extra::Err<Simple<char>>>(10)
2781 /// .from_str()
2782 /// .unwrapped();
2783 ///
2784 /// let signed = just('+').to(1)
2785 /// .or(just('-').to(-1))
2786 /// .repeated()
2787 /// .foldr(int, |a, b| a * b);
2788 ///
2789 /// assert_eq!(signed.parse("3").into_result(), Ok(3));
2790 /// assert_eq!(signed.parse("-17").into_result(), Ok(-17));
2791 /// assert_eq!(signed.parse("--+-+-5").into_result(), Ok(5));
2792 /// ```
2793 #[cfg_attr(debug_assertions, track_caller)]
2794 fn foldr<B, F, OA>(self, other: B, f: F) -> Foldr<F, Self, B, O, E>
2795 where
2796 F: Fn(O, OA) -> OA,
2797 B: Parser<'src, I, OA, E>,
2798 Self: Sized,
2799 {
2800 Foldr {
2801 parser_a: self,
2802 parser_b: other,
2803 folder: f,
2804 phantom: EmptyPhantom::new(),
2805 }
2806 }
2807
2808 /// Right-fold the output of the parser into a single value, making use of the parser's state when doing so.
2809 ///
2810 /// The output type of this parser is `B`, the right-hand component of the original parser's output.
2811 ///
2812 /// # Examples
2813 ///
2814 /// ```
2815 /// # use chumsky::{prelude::*, error::Simple, extra::SimpleState};
2816 /// let int = text::int::<_, extra::Full<Simple<char>, SimpleState<i32>, ()>>(10)
2817 /// .from_str()
2818 /// .unwrapped();
2819 ///
2820 /// let signed = just('+').to(1)
2821 /// .or(just('-').to(-1))
2822 /// .repeated()
2823 /// .foldr_with(int, |a, b, e| {
2824 /// **e.state() += 1;
2825 /// a * b
2826 /// });
2827 ///
2828 /// // Test our parser
2829 /// let mut folds = SimpleState(0i32);
2830 /// assert_eq!(signed.parse_with_state("3", &mut folds).into_result(), Ok(3));
2831 /// assert_eq!(signed.parse_with_state("-17", &mut folds).into_result(), Ok(-17));
2832 /// assert_eq!(signed.parse_with_state("--+-+-5", &mut folds).into_result(), Ok(5));
2833 /// ```
2834 ///
2835 ///
2836 #[cfg_attr(debug_assertions, track_caller)]
2837 fn foldr_with<B, F, OA>(self, other: B, f: F) -> FoldrWith<F, Self, B, O, E>
2838 where
2839 F: Fn(O, OA, &mut MapExtra<'src, '_, I, E>) -> OA,
2840 B: Parser<'src, I, OA, E>,
2841 Self: Sized,
2842 {
2843 FoldrWith {
2844 parser_a: self,
2845 parser_b: other,
2846 folder: f,
2847 phantom: EmptyPhantom::new(),
2848 }
2849 }
2850
2851 /// TODO
2852 #[cfg(feature = "nightly")]
2853 fn flatten(self) -> Flatten<Self, O>
2854 where
2855 O: IntoIterator,
2856 Self: Sized,
2857 {
2858 Flatten {
2859 parser: self,
2860 phantom: EmptyPhantom::new(),
2861 }
2862 }
2863
2864 /// Parse the given input with this [`IterParser`].
2865 ///
2866 /// The provided closure gives access to an iterator, which may be used to iterate the parser's outputs. Once the closure has terminated, a [`ParseResult`] will be returned containing the output of the closure and any parse errors that were encountered during iteration.
2867 #[cfg(feature = "unstable")]
2868 fn parse_iter<F, R>(&mut self, input: I, f: F) -> ParseResult<R, E::Error>
2869 where
2870 Self: IterParser<'src, I, O, E> + Sized,
2871 I: Input<'src>,
2872 E::State: Default,
2873 E::Context: Default,
2874 F: FnOnce(&mut ParseIter<'_, 'src, '_, Self, I, O, E>) -> R,
2875 {
2876 self.parse_iter_with_state(input, &mut Default::default(), f)
2877 }
2878
2879 /// Parse the given input with this [`IterParser`], using the given state.
2880 ///
2881 /// See [`IterParser::parse_iter`] for more information.
2882 #[cfg(feature = "unstable")]
2883 fn parse_iter_with_state<F, R>(
2884 &mut self,
2885 input: I,
2886 state: &mut E::State,
2887 f: F,
2888 ) -> ParseResult<R, E::Error>
2889 where
2890 Self: IterParser<'src, I, O, E> + Sized,
2891 I: Input<'src>,
2892 E::Context: Default,
2893 F: FnOnce(&mut ParseIter<'_, 'src, '_, Self, I, O, E>) -> R,
2894 {
2895 let mut iter = ParseIter {
2896 parser: self,
2897 own: InputOwn::new_state(input, state),
2898 iter_state: None,
2899 phantom: EmptyPhantom::new(),
2900 };
2901 let out = f(&mut iter);
2902 let mut inp = iter.own.as_ref_start();
2903 let res = end().go::<Emit>(&mut inp);
2904 let alt = inp.take_alt().map(|alt| alt.err).unwrap_or_else(|| {
2905 let fake_span = inp.span_since(&inp.cursor());
2906 // TODO: Why is this needed?
2907 E::Error::expected_found([], None, fake_span)
2908 });
2909 let mut errs = iter.own.into_errs();
2910 if res.is_err() {
2911 errs.push(alt);
2912 }
2913
2914 ParseResult::new(Some(out), errs)
2915 }
2916}
2917
2918/// An iterable equivalent of [`ConfigParser`], i.e: a parser that generates a sequence of outputs and
2919/// can be configured at runtime.
2920pub trait ConfigIterParser<'src, I, O, E = extra::Default>: IterParser<'src, I, O, E>
2921where
2922 I: Input<'src>,
2923 E: ParserExtra<'src, I>,
2924{
2925 /// A trait describing the configurable aspects of the iterable parser.
2926 type Config: Default;
2927
2928 #[doc(hidden)]
2929 fn next_cfg<M: Mode>(
2930 &self,
2931 inp: &mut InputRef<'src, '_, I, E>,
2932 state: &mut Self::IterState<M>,
2933 cfg: &Self::Config,
2934 debug: IterParserDebug,
2935 ) -> IPResult<M, O>;
2936
2937 /// A combinator that allows configuration of the parser from the current context
2938 fn configure<F>(self, cfg: F) -> IterConfigure<Self, F, O>
2939 where
2940 Self: Sized,
2941 F: Fn(Self::Config, &E::Context) -> Self::Config,
2942 {
2943 IterConfigure {
2944 parser: self,
2945 cfg,
2946 phantom: EmptyPhantom::new(),
2947 }
2948 }
2949
2950 /// A combinator that allows fallible configuration of the parser from the current context -
2951 /// if an error is returned, parsing fails.
2952 fn try_configure<F>(self, cfg: F) -> TryIterConfigure<Self, F, O>
2953 where
2954 Self: Sized,
2955 F: Fn(Self::Config, &E::Context, I::Span) -> Result<Self::Config, E::Error>,
2956 {
2957 TryIterConfigure {
2958 parser: self,
2959 cfg,
2960 phantom: EmptyPhantom::new(),
2961 }
2962 }
2963}
2964
2965/// See [`Parser::boxed`].
2966///
2967/// Due to current implementation details, the inner value is not, in fact, a [`Box`], but is an [`Rc`] to facilitate
2968/// efficient cloning. This is likely to change in the future. Unlike [`Box`], [`Rc`] has no size guarantees: although
2969/// it is *currently* the same size as a raw pointer.
2970// TODO: Don't use an Rc (why?)
2971pub struct Boxed<'src, 'b, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Default> {
2972 inner: Rc<DynParser<'src, 'b, I, O, E>>,
2973}
2974
2975impl<'src, I: Input<'src>, O, E: ParserExtra<'src, I>> Clone for Boxed<'src, '_, I, O, E> {
2976 fn clone(&self) -> Self {
2977 Self {
2978 inner: self.inner.clone(),
2979 }
2980 }
2981}
2982
2983impl<'src, I, O, E> Parser<'src, I, O, E> for Boxed<'src, '_, I, O, E>
2984where
2985 I: Input<'src>,
2986 E: ParserExtra<'src, I>,
2987{
2988 #[doc(hidden)]
2989 #[cfg(feature = "debug")]
2990 fn node_info(&self, scope: &mut debug::NodeScope) -> debug::NodeInfo {
2991 self.inner.node_info(scope)
2992 }
2993
2994 #[inline]
2995 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
2996 M::invoke(&*self.inner, inp)
2997 }
2998
2999 fn boxed<'c>(self) -> Boxed<'src, 'c, I, O, E>
3000 where
3001 Self: Sized + 'c,
3002 {
3003 // Never double-box parsers
3004 self
3005 }
3006
3007 go_extra!(O);
3008}
3009
3010impl<'src, I, O, E, T> Parser<'src, I, O, E> for ::alloc::boxed::Box<T>
3011where
3012 I: Input<'src>,
3013 E: ParserExtra<'src, I>,
3014 T: Parser<'src, I, O, E>,
3015{
3016 #[inline]
3017 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O>
3018 where
3019 Self: Sized,
3020 {
3021 T::go::<M>(self, inp)
3022 }
3023
3024 go_extra!(O);
3025}
3026
3027impl<'src, I, O, E, T> Parser<'src, I, O, E> for ::alloc::rc::Rc<T>
3028where
3029 I: Input<'src>,
3030 E: ParserExtra<'src, I>,
3031 T: Parser<'src, I, O, E>,
3032{
3033 #[inline]
3034 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O>
3035 where
3036 Self: Sized,
3037 {
3038 T::go::<M>(self, inp)
3039 }
3040
3041 go_extra!(O);
3042}
3043
3044impl<'src, I, O, E, T> Parser<'src, I, O, E> for ::alloc::sync::Arc<T>
3045where
3046 I: Input<'src>,
3047 E: ParserExtra<'src, I>,
3048 T: Parser<'src, I, O, E>,
3049{
3050 #[inline]
3051 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O>
3052 where
3053 Self: Sized,
3054 {
3055 T::go::<M>(self, inp)
3056 }
3057
3058 go_extra!(O);
3059}
3060
3061/// Create a parser that selects one or more input patterns and map them to an output value.
3062///
3063/// This is most useful when turning the tokens of a previous compilation pass (such as lexing) into data that can be
3064/// used for parsing, although it can also generally be used to select inputs and map them to outputs. Any unmapped
3065/// input patterns will become syntax errors, just as with [`Parser::filter`].
3066///
3067/// Internally, [`select!`] is very similar to a single-token [`Parser::filter`] and thinking of it as such might make
3068/// it less confusing.
3069///
3070/// `select!` requires that tokens implement [`Clone`] and the input type implements [`ValueInput`]. If you're trying
3071/// to access tokens referentially (for the sake of nested parsing, or simply because you want to avoid cloning the
3072/// token), see [`select_ref!`].
3073///
3074/// # Examples
3075///
3076/// `select!` is syntactically similar to a `match` expression and has support for
3077/// [pattern guards](https://doc.rust-lang.org/reference/expressions/match-expr.html#match-guards):
3078///
3079/// ```
3080/// # use chumsky::{prelude::*, error::Simple};
3081/// #[derive(Clone)]
3082/// enum Token<'src> { Ident(&'src str) }
3083///
3084/// enum Expr<'src> { Local(&'src str), Null, True, False }
3085///
3086/// # let _: chumsky::primitive::Select<_, &[Token], Expr, extra::Default> =
3087/// select! {
3088/// Token::Ident(s) if s == "true" => Expr::True,
3089/// Token::Ident(s) if s == "false" => Expr::False,
3090/// Token::Ident(s) if s == "null" => Expr::Null,
3091/// Token::Ident(s) => Expr::Local(s),
3092/// }
3093/// # ;
3094/// ```
3095///
3096/// If you require access to the token's span or other metadata, you may add an argument after a pattern to gain access
3097/// to it (see the docs for [`Parser::map_with`] and [`MapExtra`]):
3098///
3099/// ```
3100/// # use chumsky::{prelude::*, error::Simple};
3101/// #[derive(Clone)]
3102/// enum Token<'src> { Num(f64), Str(&'src str) }
3103///
3104/// enum Expr<'src> { Num(f64), Str(&'src str) }
3105///
3106/// type Span = SimpleSpan<usize>;
3107///
3108/// impl<'src> Expr<'src> {
3109/// fn spanned(self, span: Span) -> (Self, Span) { (self, span) }
3110/// }
3111///
3112/// # let _: chumsky::primitive::Select<_, &[Token], (Expr, Span), extra::Default> =
3113/// select! {
3114/// Token::Num(x) = e => Expr::Num(x).spanned(e.span()),
3115/// Token::Str(s) = e => Expr::Str(s).spanned(e.span()),
3116/// }
3117/// # ;
3118/// ```
3119///
3120/// ```
3121/// # use chumsky::{prelude::*, error::Simple};
3122/// // The type of our parser's input (tokens like this might be emitted by your compiler's lexer)
3123/// #[derive(Clone, Debug, PartialEq)]
3124/// enum Token {
3125/// Num(u64),
3126/// Bool(bool),
3127/// LParen,
3128/// RParen,
3129/// }
3130///
3131/// // The type of our parser's output, a syntax tree
3132/// #[derive(Debug, PartialEq)]
3133/// enum Ast {
3134/// Num(u64),
3135/// Bool(bool),
3136/// List(Vec<Ast>),
3137/// }
3138///
3139/// // Our parser converts a stream of input tokens into an AST
3140/// // `select!` is used to deconstruct some of the tokens and turn them into AST nodes
3141/// let ast = recursive::<_, _, extra::Err<Simple<Token>>, _, _>(|ast| {
3142/// let literal = select! {
3143/// Token::Num(x) => Ast::Num(x),
3144/// Token::Bool(x) => Ast::Bool(x),
3145/// };
3146///
3147/// literal.or(ast
3148/// .repeated()
3149/// .collect()
3150/// .delimited_by(just(Token::LParen), just(Token::RParen))
3151/// .map(Ast::List))
3152/// });
3153///
3154/// use Token::*;
3155/// assert_eq!(
3156/// ast.parse(&[LParen, Num(5), LParen, Bool(false), Num(42), RParen, RParen]).into_result(),
3157/// Ok(Ast::List(vec![
3158/// Ast::Num(5),
3159/// Ast::List(vec![
3160/// Ast::Bool(false),
3161/// Ast::Num(42),
3162/// ]),
3163/// ])),
3164/// );
3165/// ```
3166#[macro_export]
3167macro_rules! select {
3168 ($($(#[$attr:meta])? $p:pat $(= $extra:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({
3169 $crate::primitive::select(
3170 move |x, extra| match (x, extra) {
3171 $($(#[$attr])? ($p $(,$extra)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+,
3172 _ => ::core::option::Option::None,
3173 }
3174 )
3175 });
3176}
3177
3178/// A version of [`select!`] that selects on token by reference instead of by value.
3179///
3180/// Useful if you want to extract elements from a token in a zero-copy manner.
3181///
3182/// See the docs for [`select!`] for more information.
3183///
3184/// Requires that the parser input implements [`BorrowInput`].
3185#[macro_export]
3186macro_rules! select_ref {
3187 ($($(#[$attr:meta])? $p:pat $(= $extra:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({
3188 $crate::primitive::select_ref(
3189 move |x, extra| match (x, extra) {
3190 $($(#[$attr])? ($p $(,$extra)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+,
3191 _ => ::core::option::Option::None,
3192 }
3193 )
3194 });
3195}
3196
3197#[cfg(test)]
3198mod tests {
3199 use crate::prelude::*;
3200
3201 #[test]
3202 fn zero_copy() {
3203 use crate::input::WithContext;
3204 use crate::prelude::*;
3205
3206 #[derive(PartialEq, Debug)]
3207 enum Token<'src> {
3208 Ident(&'src str),
3209 String(&'src str),
3210 }
3211
3212 type FileId = u32;
3213 type Span = SimpleSpan<usize, FileId>;
3214
3215 fn parser<'src>(
3216 ) -> impl Parser<'src, WithContext<Span, &'src str>, [(Span, Token<'src>); 6]> {
3217 let ident = any()
3218 .filter(|c: &char| c.is_alphanumeric())
3219 .repeated()
3220 .at_least(1)
3221 .to_slice()
3222 .map(Token::Ident);
3223
3224 let string = just('"')
3225 .then(any().filter(|c: &char| *c != '"').repeated())
3226 .then(just('"'))
3227 .to_slice()
3228 .map(Token::String);
3229
3230 ident
3231 .or(string)
3232 .map_with(|token, e| (e.span(), token))
3233 .padded()
3234 .repeated()
3235 .collect_exactly()
3236 }
3237
3238 assert_eq!(
3239 parser()
3240 .parse(r#"hello "world" these are "test" tokens"#.with_context(42))
3241 .into_result(),
3242 Ok([
3243 (Span::new(42, 0..5), Token::Ident("hello")),
3244 (Span::new(42, 6..13), Token::String("\"world\"")),
3245 (Span::new(42, 14..19), Token::Ident("these")),
3246 (Span::new(42, 20..23), Token::Ident("are")),
3247 (Span::new(42, 24..30), Token::String("\"test\"")),
3248 (Span::new(42, 31..37), Token::Ident("tokens")),
3249 ]),
3250 );
3251 }
3252
3253 #[test]
3254 fn zero_copy_map_span() {
3255 use crate::{
3256 input::{SliceInput, ValueInput},
3257 prelude::*,
3258 };
3259
3260 #[derive(PartialEq, Debug)]
3261 enum Token<'src> {
3262 Ident(&'src str),
3263 String(&'src str),
3264 }
3265
3266 type FileId<'src> = &'src str;
3267 type Span<'src> = SimpleSpan<usize, FileId<'src>>;
3268
3269 fn parser<'src, I>() -> impl Parser<'src, I, [(Span<'src>, Token<'src>); 6]>
3270 where
3271 I: ValueInput<'src, Token = char, Span = Span<'src>>
3272 + SliceInput<'src, Slice = &'src str>,
3273 {
3274 let ident = any()
3275 .filter(|c: &char| c.is_alphanumeric())
3276 .repeated()
3277 .at_least(1)
3278 .to_slice()
3279 .map(Token::Ident);
3280
3281 let string = just('"')
3282 .then(any().filter(|c: &char| *c != '"').repeated())
3283 .then(just('"'))
3284 .to_slice()
3285 .map(Token::String);
3286
3287 ident
3288 .or(string)
3289 .map_with(|token, e| (e.span(), token))
3290 .padded()
3291 .repeated()
3292 .collect_exactly()
3293 }
3294
3295 let filename = "file.txt".to_string();
3296 let fstr = filename.as_str();
3297
3298 assert_eq!(
3299 parser()
3300 .parse(
3301 r#"hello "world" these are "test" tokens"#
3302 .map_span(|span| Span::new(fstr, span.start()..span.end()))
3303 )
3304 .into_result(),
3305 Ok([
3306 (Span::new("file.txt", 0..5), Token::Ident("hello")),
3307 (Span::new("file.txt", 6..13), Token::String("\"world\"")),
3308 (Span::new("file.txt", 14..19), Token::Ident("these")),
3309 (Span::new("file.txt", 20..23), Token::Ident("are")),
3310 (Span::new("file.txt", 24..30), Token::String("\"test\"")),
3311 (Span::new("file.txt", 31..37), Token::Ident("tokens")),
3312 ]),
3313 );
3314 }
3315
3316 #[test]
3317 fn zero_copy_repetition() {
3318 use crate::prelude::*;
3319
3320 fn parser<'src>() -> impl Parser<'src, &'src str, Vec<u64>> {
3321 any()
3322 .filter(|c: &char| c.is_ascii_digit())
3323 .repeated()
3324 .at_least(1)
3325 .at_most(3)
3326 .to_slice()
3327 .map(|b: &str| b.parse::<u64>().unwrap())
3328 .padded()
3329 .separated_by(just(',').padded())
3330 .allow_trailing()
3331 .collect()
3332 .delimited_by(just('['), just(']'))
3333 }
3334
3335 assert_eq!(
3336 parser().parse("[122 , 23,43, 4, ]").into_result(),
3337 Ok(vec![122, 23, 43, 4]),
3338 );
3339 assert_eq!(
3340 parser().parse("[0, 3, 6, 900,120]").into_result(),
3341 Ok(vec![0, 3, 6, 900, 120]),
3342 );
3343 assert_eq!(
3344 parser().parse("[200,400,50 ,0,0, ]").into_result(),
3345 Ok(vec![200, 400, 50, 0, 0]),
3346 );
3347
3348 assert!(parser().parse("[1234,123,12,1]").has_errors());
3349 assert!(parser().parse("[,0, 1, 456]").has_errors());
3350 assert!(parser().parse("[3, 4, 5, 67 89,]").has_errors());
3351 }
3352
3353 #[test]
3354 fn zero_copy_group() {
3355 use crate::prelude::*;
3356
3357 fn parser<'src>() -> impl Parser<'src, &'src str, (&'src str, u64, char)> {
3358 group((
3359 any()
3360 .filter(|c: &char| c.is_ascii_alphabetic())
3361 .repeated()
3362 .at_least(1)
3363 .to_slice()
3364 .padded(),
3365 any()
3366 .filter(|c: &char| c.is_ascii_digit())
3367 .repeated()
3368 .at_least(1)
3369 .to_slice()
3370 .map(|s: &str| s.parse::<u64>().unwrap())
3371 .padded(),
3372 any().filter(|c: &char| !c.is_whitespace()).padded(),
3373 ))
3374 }
3375
3376 assert_eq!(
3377 parser().parse("abc 123 [").into_result(),
3378 Ok(("abc", 123, '[')),
3379 );
3380 assert_eq!(
3381 parser().parse("among3d").into_result(),
3382 Ok(("among", 3, 'd')),
3383 );
3384 assert_eq!(
3385 parser().parse("cba321,").into_result(),
3386 Ok(("cba", 321, ',')),
3387 );
3388
3389 assert!(parser().parse("abc 123 ").has_errors());
3390 assert!(parser().parse("123abc ]").has_errors());
3391 assert!(parser().parse("and one &").has_errors());
3392 }
3393
3394 #[test]
3395 fn zero_copy_group_array() {
3396 use crate::prelude::*;
3397
3398 fn parser<'src>() -> impl Parser<'src, &'src str, [char; 3]> {
3399 group([just('a'), just('b'), just('c')])
3400 }
3401
3402 assert_eq!(parser().parse("abc").into_result(), Ok(['a', 'b', 'c']));
3403 assert!(parser().parse("abd").has_errors());
3404 }
3405
3406 #[test]
3407 fn unicode_str() {
3408 let input = "🄯🄚🄐🝋🄂🬯🈦g🍩🕔🈳2🬙🨞🅢🭳🎅h🧿🏩k🠡🀔🤟📵🤿🝜🙘5🠻🠓";
3409 let mut own = crate::input::InputOwn::<_, extra::Default>::new(input);
3410 let mut inp = own.as_ref_start();
3411
3412 while let Some(_c) = inp.next() {}
3413 }
3414
3415 #[test]
3416 #[cfg(feature = "unstable")]
3417 fn iter() {
3418 use crate::prelude::*;
3419
3420 fn many_letters<'src>() -> impl IterParser<'src, &'src str, char> {
3421 any().filter(char::is_ascii_alphabetic).repeated()
3422 }
3423
3424 let res = many_letters().parse_iter("abcdef", |iter| iter.collect::<String>());
3425
3426 assert_eq!(res.into_result().unwrap(), "abcdef");
3427
3428 let res = many_letters().parse_iter("123456", |iter| iter.collect::<String>());
3429
3430 assert!(res.has_errors());
3431 }
3432
3433 #[test]
3434 #[cfg(feature = "memoization")]
3435 fn exponential() {
3436 use crate::prelude::*;
3437
3438 fn parser<'src>() -> impl Parser<'src, &'src str, String> {
3439 recursive(|expr| {
3440 let atom = any()
3441 .filter(|c: &char| c.is_alphabetic())
3442 .repeated()
3443 .at_least(1)
3444 .collect()
3445 .or(expr.delimited_by(just('('), just(')')));
3446
3447 atom.clone()
3448 .then_ignore(just('+'))
3449 .then(atom.clone())
3450 .map(|(a, b)| format!("{a}{b}"))
3451 .memoized()
3452 .or(atom)
3453 })
3454 .then_ignore(end())
3455 }
3456
3457 parser()
3458 .parse("((((((((((((((((((((((((((((((a+b))))))))))))))))))))))))))))))")
3459 .into_result()
3460 .unwrap();
3461 }
3462
3463 #[test]
3464 #[cfg(feature = "memoization")]
3465 fn left_recursive() {
3466 use crate::prelude::*;
3467
3468 fn parser<'src>() -> impl Parser<'src, &'src str, String> {
3469 recursive(|expr| {
3470 let atom = any()
3471 .filter(|c: &char| c.is_alphabetic())
3472 .repeated()
3473 .at_least(1)
3474 .collect();
3475
3476 let sum = expr
3477 .clone()
3478 .then_ignore(just('+'))
3479 .then(expr)
3480 .map(|(a, b)| format!("{a}{b}"))
3481 .memoized();
3482
3483 sum.or(atom)
3484 })
3485 .then_ignore(end())
3486 }
3487
3488 assert_eq!(parser().parse("a+b+c").into_result().unwrap(), "abc");
3489 }
3490
3491 #[cfg(debug_assertions)]
3492 mod debug_asserts {
3493 use crate::prelude::*;
3494
3495 // TODO panic when left recursive parser is detected
3496 // #[test]
3497 // #[should_panic]
3498 // fn debug_assert_left_recursive() {
3499 // recursive(|expr| {
3500 // let atom = any::<&str, extra::Default>()
3501 // .filter(|c: &char| c.is_alphabetic())
3502 // .repeated()
3503 // .at_least(1)
3504 // .collect();
3505
3506 // let sum = expr
3507 // .clone()
3508 // .then_ignore(just('+'))
3509 // .then(expr)
3510 // .map(|(a, b)| format!("{a}{b}"));
3511
3512 // sum.or(atom)
3513 // })
3514 // .then_ignore(end())
3515 // .parse("a+b+c");
3516 // }
3517
3518 #[test]
3519 #[should_panic]
3520 #[cfg(debug_assertions)]
3521 fn debug_assert_collect() {
3522 empty::<&str, extra::Default>()
3523 .repeated()
3524 .collect::<()>()
3525 .parse("a+b+c")
3526 .unwrap();
3527 }
3528
3529 #[test]
3530 #[should_panic]
3531 #[cfg(debug_assertions)]
3532 fn debug_assert_separated_by() {
3533 empty::<&str, extra::Default>()
3534 .to(())
3535 .separated_by(empty())
3536 .collect::<()>()
3537 .parse("a+b+c");
3538 }
3539
3540 #[test]
3541 fn debug_assert_separated_by2() {
3542 assert_eq!(
3543 empty::<&str, extra::Default>()
3544 .separated_by(just(','))
3545 .count()
3546 .parse(",")
3547 .unwrap(),
3548 2
3549 );
3550 }
3551
3552 #[test]
3553 #[should_panic]
3554 #[cfg(debug_assertions)]
3555 fn debug_assert_foldl() {
3556 assert_eq!(
3557 empty::<&str, extra::Default>()
3558 .to(1)
3559 .foldl(empty().repeated(), |n, ()| n + 1)
3560 .parse("a+b+c")
3561 .unwrap(),
3562 3
3563 );
3564 }
3565
3566 #[test]
3567 #[should_panic]
3568 #[cfg(debug_assertions)]
3569 fn debug_assert_foldl_with() {
3570 use extra::SimpleState;
3571
3572 let state = 100;
3573 empty::<&str, extra::Full<EmptyErr, SimpleState<i32>, ()>>()
3574 .foldl_with(empty().to(()).repeated(), |_, _, _| ())
3575 .parse_with_state("a+b+c", &mut state.into());
3576 }
3577
3578 #[test]
3579 #[should_panic]
3580 #[cfg(debug_assertions)]
3581 fn debug_assert_foldr() {
3582 empty::<&str, extra::Default>()
3583 .to(())
3584 .repeated()
3585 .foldr(empty(), |_, _| ())
3586 .parse("a+b+c");
3587 }
3588
3589 #[test]
3590 #[should_panic]
3591 #[cfg(debug_assertions)]
3592 fn debug_assert_foldr_with_state() {
3593 empty::<&str, extra::Default>()
3594 .to(())
3595 .repeated()
3596 .foldr_with(empty(), |_, _, _| ())
3597 .parse_with_state("a+b+c", &mut ());
3598 }
3599
3600 #[test]
3601 #[should_panic]
3602 #[cfg(debug_assertions)]
3603 fn debug_assert_repeated() {
3604 empty::<&str, extra::Default>()
3605 .to(())
3606 .repeated()
3607 .parse("a+b+c");
3608 }
3609
3610 // TODO what about IterConfigure and TryIterConfigure?
3611 }
3612
3613 #[test]
3614 #[should_panic]
3615 fn recursive_define_twice() {
3616 let mut expr = Recursive::declare();
3617 expr.define({
3618 let atom = any::<&str, extra::Default>()
3619 .filter(|c: &char| c.is_alphabetic())
3620 .repeated()
3621 .at_least(1)
3622 .collect();
3623 let sum = expr
3624 .clone()
3625 .then_ignore(just('+'))
3626 .then(expr.clone())
3627 .map(|(a, b)| format!("{a}{b}"));
3628
3629 sum.or(atom)
3630 });
3631 expr.define(expr.clone());
3632
3633 expr.then_ignore(end()).parse("a+b+c");
3634 }
3635
3636 #[test]
3637 #[should_panic]
3638 fn todo_err() {
3639 let expr = todo::<&str, String, extra::Default>();
3640 expr.then_ignore(end()).parse("a+b+c");
3641 }
3642
3643 #[test]
3644 fn box_impl() {
3645 fn parser<'src>() -> impl Parser<'src, &'src str, Vec<u64>> {
3646 Box::new(
3647 any()
3648 .filter(|c: &char| c.is_ascii_digit())
3649 .repeated()
3650 .at_least(1)
3651 .at_most(3)
3652 .to_slice()
3653 .map(|b: &str| b.parse::<u64>().unwrap())
3654 .padded()
3655 .separated_by(just(',').padded())
3656 .allow_trailing()
3657 .collect()
3658 .delimited_by(just('['), just(']')),
3659 )
3660 }
3661
3662 assert_eq!(
3663 parser().parse("[122 , 23,43, 4, ]").into_result(),
3664 Ok(vec![122, 23, 43, 4]),
3665 );
3666 assert_eq!(
3667 parser().parse("[0, 3, 6, 900,120]").into_result(),
3668 Ok(vec![0, 3, 6, 900, 120]),
3669 );
3670 assert_eq!(
3671 parser().parse("[200,400,50 ,0,0, ]").into_result(),
3672 Ok(vec![200, 400, 50, 0, 0]),
3673 );
3674 }
3675
3676 #[test]
3677 fn rc_impl() {
3678 use alloc::rc::Rc;
3679
3680 fn parser<'src>() -> impl Parser<'src, &'src str, Vec<u64>> {
3681 Rc::new(
3682 any()
3683 .filter(|c: &char| c.is_ascii_digit())
3684 .repeated()
3685 .at_least(1)
3686 .at_most(3)
3687 .to_slice()
3688 .map(|b: &str| b.parse::<u64>().unwrap())
3689 .padded()
3690 .separated_by(just(',').padded())
3691 .allow_trailing()
3692 .collect()
3693 .delimited_by(just('['), just(']')),
3694 )
3695 }
3696
3697 assert_eq!(
3698 parser().parse("[122 , 23,43, 4, ]").into_result(),
3699 Ok(vec![122, 23, 43, 4]),
3700 );
3701 assert_eq!(
3702 parser().parse("[0, 3, 6, 900,120]").into_result(),
3703 Ok(vec![0, 3, 6, 900, 120]),
3704 );
3705 assert_eq!(
3706 parser().parse("[200,400,50 ,0,0, ]").into_result(),
3707 Ok(vec![200, 400, 50, 0, 0]),
3708 );
3709 }
3710
3711 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
3712 struct MyErr(&'static str);
3713
3714 impl<'src, I: Input<'src>> crate::Error<'src, I> for MyErr {
3715 fn merge(self, other: Self) -> Self {
3716 if other == MyErr("special") {
3717 MyErr("special")
3718 } else {
3719 self
3720 }
3721 }
3722 }
3723
3724 impl<'src, I> crate::LabelError<'src, I, crate::DefaultExpected<'src, I::Token>> for MyErr
3725 where
3726 I: Input<'src>,
3727 {
3728 fn expected_found<E: IntoIterator<Item = crate::DefaultExpected<'src, I::Token>>>(
3729 _expected: E,
3730 _found: Option<crate::MaybeRef<'src, I::Token>>,
3731 _span: I::Span,
3732 ) -> Self {
3733 MyErr("expected found")
3734 }
3735 }
3736
3737 #[test]
3738 fn err_prio_0() {
3739 #[allow(dead_code)]
3740 fn always_err<'src>() -> impl Parser<'src, &'src str, (), extra::Err<MyErr>> {
3741 empty().try_map(|_, _| Err(MyErr("special")))
3742 }
3743
3744 assert_eq!(
3745 always_err().parse("test").into_result().unwrap_err(),
3746 vec![MyErr("special")]
3747 )
3748 }
3749
3750 #[test]
3751 fn err_prio_1() {
3752 #[allow(dead_code)]
3753 fn always_err_choice<'src>() -> impl Parser<'src, &'src str, (), extra::Err<MyErr>> {
3754 choice((just("something").ignored(), empty())).try_map(|_, _| Err(MyErr("special")))
3755 }
3756
3757 assert_eq!(
3758 always_err_choice().parse("test").into_result().unwrap_err(),
3759 vec![MyErr("special")]
3760 )
3761 }
3762
3763 #[test]
3764 fn into_iter_no_error() {
3765 fn parser<'src>() -> impl Parser<'src, &'src str, (), extra::Err<MyErr>> {
3766 let many_as = just('a')
3767 .ignored()
3768 .repeated()
3769 .at_least(1)
3770 .collect::<Vec<_>>();
3771
3772 many_as.into_iter().collect()
3773 }
3774
3775 assert_eq!(parser().parse("aaa").into_result(), Ok(()));
3776 }
3777
3778 #[cfg(feature = "nightly")]
3779 #[test]
3780 fn flatten() {
3781 fn parser<'src>() -> impl Parser<'src, &'src str, Vec<char>, extra::Err<MyErr>> {
3782 let many_as = just('a')
3783 .map(Some)
3784 .or(any().to(None))
3785 .repeated()
3786 .flatten()
3787 .collect::<Vec<_>>();
3788
3789 many_as.into_iter().collect()
3790 }
3791
3792 assert_eq!(
3793 parser().parse("abracadabra").into_result(),
3794 Ok(vec!['a', 'a', 'a', 'a', 'a'])
3795 );
3796 }
3797
3798 #[test]
3799 fn iterable_then() {
3800 fn parser<'src>() -> impl Parser<'src, &'src str, Vec<char>> {
3801 just('a')
3802 .map(Some)
3803 .into_iter()
3804 .then(just('b').repeated())
3805 .then(just('c').repeated())
3806 .collect()
3807 }
3808
3809 assert_eq!(
3810 parser().parse("abbcc").into_result(),
3811 Ok(vec!['a', 'b', 'b', 'c', 'c'])
3812 );
3813 assert_eq!(parser().parse("acc").into_result(), Ok(vec!['a', 'c', 'c']));
3814 assert!(parser().parse("bbc").has_errors());
3815 }
3816
3817 #[test]
3818 #[allow(dead_code)]
3819 fn map_with_compiles() {
3820 enum Token {}
3821 enum Expr {}
3822
3823 fn expr<'src, I>() -> impl Parser<'src, I, (Expr, SimpleSpan)> + 'src
3824 where
3825 I: Input<'src, Token = Token, Span = SimpleSpan> + 'src,
3826 {
3827 todo().map_with(|expr, e| (expr, e.span()))
3828 }
3829 }
3830
3831 #[test]
3832 fn label() {
3833 use crate::label::LabelError;
3834
3835 fn parser<'src>() -> impl Parser<'src, &'src str, (), extra::Err<Rich<'src, char>>> {
3836 just("hello").labelled("greeting").as_context().ignored()
3837 }
3838
3839 let mut err = <Rich<_> as crate::LabelError<&str, char>>::expected_found(
3840 ['h'],
3841 Some('b'.into()),
3842 (0..1).into(),
3843 );
3844 <Rich<_, _> as LabelError<&str, _>>::label_with(&mut err, "greeting");
3845 assert_eq!(parser().parse("bye").into_errors(), vec![err]);
3846
3847 let mut err = <Rich<_> as crate::LabelError<&str, char>>::expected_found(
3848 ['l'],
3849 Some('p'.into()),
3850 (3..4).into(),
3851 );
3852 <Rich<_, _> as LabelError<&str, _>>::in_context(&mut err, "greeting", (0..3).into());
3853 assert_eq!(parser().parse("help").into_errors(), vec![err]);
3854
3855 fn parser2<'src>() -> impl Parser<'src, &'src str, (), extra::Err<Rich<'src, char>>> {
3856 text::keyword("hello")
3857 .labelled("greeting")
3858 .as_context()
3859 .ignored()
3860 }
3861
3862 let mut err =
3863 <Rich<_> as crate::LabelError<&str, char>>::expected_found(['h'], None, (0..7).into());
3864 <Rich<_, _> as LabelError<&str, _>>::label_with(&mut err, "greeting");
3865 assert_eq!(parser2().parse("goodbye").into_errors(), vec![err]);
3866 }
3867
3868 #[test]
3869 fn labelled_with() {
3870 use crate::label::LabelError;
3871
3872 fn parser<'src>() -> impl Parser<'src, &'src str, (), extra::Err<Rich<'src, char>>> {
3873 just("hello")
3874 .ignored()
3875 .recover_with(via_parser(empty()))
3876 .labelled_with(|| "greeting")
3877 .as_context()
3878 }
3879
3880 let mut err =
3881 <Rich<_> as LabelError<&str, char>>::expected_found(['h'], None, (0..0).into());
3882 <Rich<_, _> as LabelError<&str, _>>::in_context(&mut err, "greeting", (0..0).into());
3883 assert_eq!(parser().parse("").into_errors(), vec![err]);
3884 }
3885
3886 #[test]
3887 #[allow(dead_code)]
3888 fn invalid_escape() {
3889 use crate::LabelError;
3890
3891 fn string<'src>() -> impl Parser<'src, &'src str, &'src str, extra::Err<Rich<'src, char>>> {
3892 let quote = just("\"");
3893 let escaped = just("\\").then(just("n"));
3894 let unescaped = none_of("\\\"");
3895
3896 unescaped
3897 .ignored()
3898 .or(escaped.ignored())
3899 .repeated()
3900 .to_slice()
3901 .delimited_by(quote, quote)
3902 }
3903
3904 assert_eq!(
3905 string().parse(r#""Hello\m""#).into_result(),
3906 Err(vec![
3907 <Rich<char> as LabelError::<&str, char>>::expected_found(
3908 ['n'],
3909 Some('m'.into()),
3910 (7..8).into(),
3911 )
3912 ]),
3913 );
3914 }
3915
3916 #[test]
3917 #[allow(dead_code)]
3918 fn map_err_missed_info() {
3919 use crate::{extra::Err, LabelError};
3920
3921 fn erroneous_map_err<'src>() -> impl Parser<'src, &'src str, (), Err<Rich<'src, char>>> {
3922 group((
3923 just("a").or_not(),
3924 just("b").map_err(|mut err| {
3925 LabelError::<&str, _>::label_with(&mut err, 'l');
3926 err
3927 }),
3928 ))
3929 .ignored()
3930 }
3931
3932 assert_eq!(
3933 erroneous_map_err().parse("_").into_output_errors(),
3934 (
3935 None,
3936 vec![LabelError::<&str, _>::expected_found(
3937 ['a', 'l'],
3938 Some('_'.into()),
3939 SimpleSpan::new((), 0..1),
3940 )]
3941 ),
3942 );
3943
3944 fn erroneous_then<'src>() -> impl Parser<'src, &'src str, (), Err<Rich<'src, char>>> {
3945 group((
3946 just("a").or_not(),
3947 empty().map_err(|mut err| {
3948 LabelError::<&str, _>::label_with(&mut err, 'l');
3949 err
3950 }),
3951 just("c"),
3952 ))
3953 .ignored()
3954 }
3955
3956 assert_eq!(
3957 erroneous_then().parse("_").into_output_errors(),
3958 (
3959 None,
3960 vec![LabelError::<&str, _>::expected_found(
3961 ['a', 'c'],
3962 Some('_'.into()),
3963 SimpleSpan::new((), 0..1),
3964 )]
3965 ),
3966 );
3967 }
3968
3969 #[test]
3970 fn map_err() {
3971 use crate::LabelError;
3972
3973 let parser = just::<char, &str, extra::Err<_>>('"').map_err(move |e: Rich<char>| {
3974 println!("Found = {:?}", e.found());
3975 println!("Expected = {:?}", e.expected().collect::<Vec<_>>());
3976 println!("Span = {:?}", e.span());
3977 LabelError::<&str, char>::expected_found(
3978 ['"'],
3979 e.found().copied().map(Into::into),
3980 *e.span(),
3981 )
3982 });
3983
3984 assert_eq!(
3985 parser.parse(r#"H"#).into_result(),
3986 Err(vec![LabelError::<&str, char>::expected_found(
3987 ['"'],
3988 Some('H'.into()),
3989 (0..1).into()
3990 )])
3991 );
3992 }
3993
3994 #[test]
3995 fn map_err_with() {
3996 use crate::LabelError;
3997
3998 let parser = just::<char, &str, extra::Err<_>>('#')
3999 .repeated()
4000 .count()
4001 .ignore_with_ctx(just('"').map_err_with(move |e: Rich<char>, extras| {
4002 println!("Found = {:?}", e.found());
4003 println!("Expected = {:?}", e.expected().collect::<Vec<_>>());
4004 println!("Span = {:?}", e.span());
4005 println!("Context = {:?}", extras.ctx());
4006 LabelError::<&str, String>::expected_found(
4007 [format!("after {} hashes", extras.ctx())],
4008 e.found().copied().map(Into::into),
4009 *e.span(),
4010 )
4011 }));
4012
4013 let mut err: Rich<_> =
4014 LabelError::<&str, char>::expected_found(['#'], Some('H'.into()), (3..4).into());
4015 err = LabelError::<&str, String>::merge_expected_found(
4016 err,
4017 ["after 3 hashes".into()],
4018 Some('H'.into()),
4019 (3..4).into(),
4020 );
4021 assert_eq!(parser.parse("###H").into_result(), Err(vec![err]));
4022 }
4023
4024 #[test]
4025 fn try_map() {
4026 use crate::{DefaultExpected, LabelError};
4027
4028 let parser = group((
4029 just("a").or_not(),
4030 just("b").try_map(|_, _| Ok(())).or_not(),
4031 just::<_, &str, extra::Err<Rich<_>>>("c"),
4032 ))
4033 .ignored();
4034
4035 assert_eq!(
4036 parser.parse("").into_output_errors(),
4037 (
4038 None,
4039 vec![LabelError::<&str, _>::expected_found(
4040 vec![
4041 DefaultExpected::Token('a'.into()),
4042 DefaultExpected::Token('b'.into()),
4043 DefaultExpected::Token('c'.into()),
4044 ],
4045 None,
4046 SimpleSpan::new((), 0..0)
4047 )]
4048 )
4049 );
4050 }
4051
4052 #[test]
4053 fn try_map_with() {
4054 use crate::{DefaultExpected, LabelError};
4055
4056 let parser = group((
4057 just("a").or_not(),
4058 just("b").try_map_with(|_, _| Ok(())).or_not(),
4059 just::<_, &str, extra::Err<Rich<_>>>("c"),
4060 ))
4061 .ignored();
4062
4063 assert_eq!(
4064 parser.parse("").into_output_errors(),
4065 (
4066 None,
4067 vec![LabelError::<&str, _>::expected_found(
4068 vec![
4069 DefaultExpected::Token('a'.into()),
4070 DefaultExpected::Token('b'.into()),
4071 DefaultExpected::Token('c'.into()),
4072 ],
4073 None,
4074 SimpleSpan::new((), 0..0)
4075 )]
4076 )
4077 );
4078 }
4079
4080 #[test]
4081 fn filter() {
4082 use crate::{DefaultExpected, LabelError};
4083
4084 let parser = just::<_, _, extra::Err<Rich<_>>>("a").filter(|_| false);
4085
4086 assert_eq!(
4087 parser.parse("a").into_result(),
4088 Err(vec![LabelError::<&str, _>::expected_found(
4089 [DefaultExpected::SomethingElse],
4090 Some('a'.into()),
4091 SimpleSpan::new((), 0..1)
4092 ),])
4093 );
4094
4095 let parser = group((
4096 just("a").or_not(),
4097 just("b").filter(|_| false).or_not(),
4098 just::<_, &str, extra::Err<Rich<_>>>("c"),
4099 ));
4100
4101 assert_eq!(
4102 parser.parse("b").into_output_errors(),
4103 (
4104 None,
4105 vec![LabelError::<&str, _>::expected_found(
4106 vec![
4107 DefaultExpected::Token('a'.into()),
4108 DefaultExpected::SomethingElse,
4109 DefaultExpected::Token('c'.into()),
4110 ],
4111 Some('b'.into()),
4112 SimpleSpan::new((), 0..1)
4113 )]
4114 )
4115 );
4116 }
4117
4118 #[test]
4119 fn rewind() {
4120 use crate::{DefaultExpected, LabelError};
4121
4122 let parser = group((just("a"), any(), just("b").or_not()))
4123 .rewind()
4124 .then(just::<_, _, extra::Err<Rich<_>>>("ac"));
4125
4126 assert_eq!(
4127 parser.parse("ad").into_output_errors(),
4128 (
4129 None,
4130 vec![LabelError::<&str, _>::expected_found(
4131 [DefaultExpected::Token('c'.into())],
4132 Some('d'.into()),
4133 SimpleSpan::new((), 1..2)
4134 )]
4135 )
4136 )
4137 }
4138
4139 #[test]
4140 fn separated_by() {
4141 use crate::{error::Simple, extra};
4142
4143 let parser = just::<_, &str, extra::Err<Simple<_>>>("a")
4144 .or_not()
4145 .separated_by(just("b"));
4146
4147 assert_eq!(parser.parse("bba").into_result(), Ok(()));
4148 }
4149
4150 #[test]
4151 fn zero_size_custom_failure() {
4152 fn my_custom<'src>() -> impl Parser<'src, &'src str, ()> {
4153 custom(|inp| {
4154 let check = inp.save();
4155 if inp.parse(just("foo")).is_err() {
4156 inp.rewind(check);
4157 }
4158 Ok(())
4159 })
4160 }
4161
4162 assert!(my_custom().parse("not foo").has_errors());
4163 }
4164
4165 #[test]
4166 fn labels() {
4167 use crate::{DefaultExpected, Error, LabelError, TextExpected};
4168
4169 let parser = just("a")
4170 .or_not()
4171 .then(text::whitespace::<&str, extra::Err<Rich<_>>>());
4172
4173 assert_eq!(
4174 parser.parse("b").into_output_errors(),
4175 (
4176 None,
4177 vec![Error::<&str>::merge(
4178 Error::<&str>::merge(
4179 LabelError::<&str, _>::expected_found(
4180 vec![DefaultExpected::Token('a'.into())],
4181 Some('b'.into()),
4182 SimpleSpan::new((), 0..1)
4183 ),
4184 LabelError::<&str, _>::expected_found(
4185 vec![TextExpected::<&str>::Whitespace],
4186 Some('b'.into()),
4187 SimpleSpan::new((), 0..1)
4188 ),
4189 ),
4190 LabelError::<&str, _>::expected_found(
4191 vec![DefaultExpected::EndOfInput],
4192 Some('b'.into()),
4193 SimpleSpan::new((), 0..1)
4194 ),
4195 )]
4196 )
4197 );
4198 }
4199
4200 #[test]
4201 fn labelled_not() {
4202 use crate::{DefaultExpected, LabelError};
4203
4204 let parser = any::<_, extra::Err<Rich<_>>>().not().labelled("label");
4205
4206 let mut err = LabelError::<&str, _>::expected_found(
4207 [DefaultExpected::SomethingElse],
4208 Some('b'.into()),
4209 SimpleSpan::new((), 0..1),
4210 );
4211 LabelError::<&str, _>::label_with(&mut err, "label");
4212 assert_eq!(parser.parse("b").into_output_errors(), (None, vec![err]));
4213 }
4214
4215 #[test]
4216 fn state_rewind() {
4217 use crate::{extra::Full, inspector::TruncateState};
4218
4219 let parser = any::<_, Full<EmptyErr, TruncateState<char>, ()>>()
4220 .map_with(|out, extra| {
4221 extra.state().0.push(out);
4222 extra.state().0.len() - 1
4223 })
4224 .rewind()
4225 .then_ignore(any());
4226
4227 let mut state = TruncateState::default();
4228 let res = parser.parse_with_state("a", &mut state).unwrap();
4229 assert_eq!(res, 0);
4230 assert_eq!(state.0.as_slice(), ['a']);
4231 }
4232
4233 #[test]
4234 fn error_rewind() {
4235 let parser = any::<_, extra::Default>()
4236 .validate(|out, _, emitter| {
4237 emitter.emit(EmptyErr::default());
4238 out
4239 })
4240 .rewind()
4241 .then_ignore(any());
4242
4243 assert_eq!(
4244 parser.parse("a").into_output_errors(),
4245 (Some('a'), vec![EmptyErr::default()])
4246 );
4247 }
4248
4249 #[test]
4250 fn secondary_error_choice() {
4251 let secondary_error = any::<_, extra::Default>()
4252 .validate(|out, _, emitter| {
4253 emitter.emit(EmptyErr::default());
4254 out
4255 })
4256 .then(just('c'));
4257 let parser = choice((just('a').then(just('b')), secondary_error));
4258
4259 assert_eq!(
4260 parser.parse("aa").into_output_errors(),
4261 (None, vec![EmptyErr::default(), EmptyErr::default()])
4262 );
4263 }
4264
4265 /*
4266 #[test]
4267 fn label_sets() {
4268 use crate::{DefaultExpected, Error, LabelError, TextExpected, text::whitespace};
4269
4270 fn tuple<'input>() -> impl Parser<'input, &'input str, (), extra::Err<Rich<'input, char, SimpleSpan>>> {
4271 just("a")
4272 .repeated()
4273 .then_ignore(whitespace())
4274 .separated_by(just(","))
4275 .then_ignore(just(")"))
4276 }
4277
4278 assert_eq!(
4279 tuple().parse("a").into_output_errors(),
4280 (
4281 None,
4282 vec![Error::<&str>::merge(
4283 LabelError::<&str, _>::expected_found(
4284 vec![TextExpected::<&str>::Whitespace],
4285 None,
4286 SimpleSpan::new((), 1..1)
4287 ),
4288 LabelError::<&str, _>::expected_found(
4289 vec![
4290 DefaultExpected::Token('a'.into()),
4291 DefaultExpected::Token(','.into()),
4292 DefaultExpected::Token(')'.into()),
4293 ],
4294 None,
4295 SimpleSpan::new((), 1..1)
4296 )
4297 )]
4298 )
4299 );
4300 }
4301 */
4302
4303 // Prevent a regression
4304 #[test]
4305 fn labelled_recovery_dont_panic() {
4306 fn parser<'i>() -> impl Parser<'i, &'i str, SimpleSpan> {
4307 choice((choice((just("true"), just("false")))
4308 .labelled("boolean")
4309 .to_span(),))
4310 .recover_with(via_parser(any().and_is(text::newline().not()).to_span()))
4311 }
4312
4313 let _ = parser().parse("tru");
4314 }
4315
4316 #[test]
4317 fn expected_nothing() {
4318 fn parser<'i>() -> impl Parser<'i, &'i str, &'i str, extra::Err<Rich<'i, char>>> {
4319 just("foo").contextual().configure(|_, _| false)
4320 }
4321
4322 let mut errs = parser().parse("foo").into_errors();
4323 assert_eq!(errs.len(), 1);
4324 // The parser is unsatisifiable, so nothing should be expected
4325 assert_eq!(errs.remove(0).expected().len(), 0);
4326 }
4327
4328 // Regression test for https://codeberg.org/zesterer/chumsky/issues/985
4329 #[test]
4330 fn labelled_context_validate() {
4331 use crate::input::MappedInput;
4332
4333 type Span = SimpleSpan;
4334 type Token = Spanned<char, Span>;
4335 type ParserInput<'tokens> = MappedInput<'tokens, char, Span, &'tokens [Token]>;
4336 type ParseError<'tokens> = extra::Err<Rich<'tokens, char, Span>>;
4337
4338 fn parser<'tokens>(
4339 ) -> impl Parser<'tokens, ParserInput<'tokens>, Vec<bool>, ParseError<'tokens>> {
4340 let single = just("a")
4341 .ignore_then(one_of("bc").spanned())
4342 .validate(|b_or_c, _, emitter| {
4343 let Spanned { inner, span } = b_or_c;
4344 match inner {
4345 'b' => true,
4346 'c' => {
4347 emitter.emit(Rich::custom(span, "expected 'b' but found 'c'"));
4348 false
4349 }
4350 _ => unreachable!(),
4351 }
4352 })
4353 .labelled("SINGLE PARSER")
4354 .as_context();
4355 single.repeated().collect()
4356 }
4357
4358 // Conceptually represents the source string "ac ab ac ad"
4359 let tokens = vec![
4360 Token {
4361 inner: 'a',
4362 span: Span::from(0..1),
4363 },
4364 Token {
4365 inner: 'c',
4366 span: Span::from(1..2),
4367 },
4368 Token {
4369 inner: 'a',
4370 span: Span::from(3..4),
4371 },
4372 Token {
4373 inner: 'b',
4374 span: Span::from(4..5),
4375 },
4376 Token {
4377 inner: 'a',
4378 span: Span::from(6..7),
4379 },
4380 Token {
4381 inner: 'c',
4382 span: Span::from(7..8),
4383 },
4384 ];
4385 let eoi = Span::from(8..8);
4386
4387 let (output, errors) = parser()
4388 .parse(tokens.split_spanned(eoi))
4389 .into_output_errors();
4390
4391 assert_eq!(output, Some(vec![false, true, false]));
4392 assert_eq!(errors.len(), 2);
4393 if let [custom_1, custom_2] = &errors[..] {
4394 let custom_1_contexts: Vec<_> = custom_1.contexts().collect();
4395 assert_eq!(*custom_1.span(), Span::from(1..2));
4396 assert_eq!(custom_1_contexts.len(), 1);
4397 assert_eq!(*custom_1_contexts[0].1, Span::from(0..2));
4398
4399 let custom_2_contexts: Vec<_> = custom_2.contexts().collect();
4400 assert_eq!(*custom_2.span(), Span::from(7..8));
4401 assert_eq!(custom_2_contexts.len(), 1);
4402 assert_eq!(*custom_2_contexts[0].1, Span::from(6..8));
4403 } else {
4404 unreachable!();
4405 }
4406 }
4407}