pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = Default>: ParserSealed<'a, I, O, E> {
Show 46 methods
// Provided methods
fn parse(&self, input: I) -> ParseResult<O, E::Error>
where I: Input<'a>,
E::State: Default,
E::Context: Default { ... }
fn parse_with_state(
&self,
input: I,
state: &mut E::State
) -> ParseResult<O, E::Error>
where I: Input<'a>,
E::Context: Default { ... }
fn check(&self, input: I) -> ParseResult<(), E::Error>
where Self: Sized,
I: Input<'a>,
E::State: Default,
E::Context: Default { ... }
fn check_with_state(
&self,
input: I,
state: &mut E::State
) -> ParseResult<(), E::Error>
where Self: Sized,
I: Input<'a>,
E::Context: Default { ... }
fn to_slice(self) -> ToSlice<Self, O>
where Self: Sized { ... }
fn filter<F: Fn(&O) -> bool>(self, f: F) -> Filter<Self, F>
where Self: Sized { ... }
fn map<U, F: Fn(O) -> U>(self, f: F) -> Map<Self, O, F>
where Self: Sized { ... }
fn map_with<U, F: Fn(O, &mut MapExtra<'a, '_, I, E>) -> U>(
self,
f: F
) -> MapWith<Self, O, F>
where Self: Sized { ... }
fn map_group<F: Fn<O>>(self, f: F) -> MapGroup<Self, O, F>
where Self: Sized,
O: Tuple { ... }
fn to_span(self) -> ToSpan<Self, O>
where Self: Sized { ... }
fn try_map<U, F: Fn(O, I::Span) -> Result<U, E::Error>>(
self,
f: F
) -> TryMap<Self, O, F>
where Self: Sized { ... }
fn try_map_with<U, F: Fn(O, &mut MapExtra<'a, '_, I, E>) -> Result<U, E::Error>>(
self,
f: F
) -> TryMapWith<Self, O, F>
where Self: Sized { ... }
fn ignored(self) -> Ignored<Self, O>
where Self: Sized { ... }
fn memoized(self) -> Memoized<Self>
where Self: Sized { ... }
fn to<U: Clone>(self, to: U) -> To<Self, O, U>
where Self: Sized { ... }
fn labelled<L>(self, label: L) -> Labelled<Self, L>
where Self: Sized,
E::Error: LabelError<'a, I, L> { ... }
fn then<U, B: Parser<'a, I, U, E>>(self, other: B) -> Then<Self, B, O, U, E>
where Self: Sized { ... }
fn ignore_then<U, B: Parser<'a, I, U, E>>(
self,
other: B
) -> IgnoreThen<Self, B, O, E>
where Self: Sized { ... }
fn then_ignore<U, B: Parser<'a, I, U, E>>(
self,
other: B
) -> ThenIgnore<Self, B, U, E>
where Self: Sized { ... }
fn nested_in<B: Parser<'a, I, I, E>>(
self,
other: B
) -> NestedIn<Self, B, O, E>
where Self: Sized,
I: 'a { ... }
fn ignore_with_ctx<U, P>(
self,
then: P
) -> IgnoreWithCtx<Self, P, O, I, Full<E::Error, E::State, O>>
where Self: Sized,
O: 'a,
P: Parser<'a, I, U, Full<E::Error, E::State, O>> { ... }
fn then_with_ctx<U, P>(
self,
then: P
) -> ThenWithCtx<Self, P, O, I, Full<E::Error, E::State, O>>
where Self: Sized,
O: 'a,
P: Parser<'a, I, U, Full<E::Error, E::State, O>> { ... }
fn with_ctx<Ctx>(self, ctx: Ctx) -> WithCtx<Self, Ctx>
where Self: Sized,
Ctx: 'a + Clone { ... }
fn with_state<State>(self, state: State) -> WithState<Self, State>
where Self: Sized,
State: 'a + Clone { ... }
fn and_is<U, B>(self, other: B) -> AndIs<Self, B, U>
where Self: Sized,
B: Parser<'a, I, U, E> { ... }
fn delimited_by<U, V, B, C>(
self,
start: B,
end: C
) -> DelimitedBy<Self, B, C, U, V>
where Self: Sized,
B: Parser<'a, I, U, E>,
C: Parser<'a, I, V, E> { ... }
fn padded_by<U, B>(self, padding: B) -> PaddedBy<Self, B, U>
where Self: Sized,
B: Parser<'a, I, U, E> { ... }
fn or<B>(self, other: B) -> Or<Self, B>
where Self: Sized,
B: Parser<'a, I, O, E> { ... }
fn or_not(self) -> OrNot<Self>
where Self: Sized { ... }
fn not(self) -> Not<Self, O>
where Self: Sized { ... }
fn repeated(self) -> Repeated<Self, O, I, E>
where Self: Sized { ... }
fn separated_by<U, B>(
self,
separator: B
) -> SeparatedBy<Self, B, O, U, I, E>
where Self: Sized,
B: Parser<'a, I, U, E> { ... }
fn foldl<B, F, OB>(self, other: B, f: F) -> Foldl<F, Self, B, OB, E>
where F: Fn(O, OB) -> O,
B: IterParser<'a, I, OB, E>,
Self: Sized { ... }
fn foldl_with<B, F, OB>(
self,
other: B,
f: F
) -> FoldlWith<F, Self, B, OB, E>
where F: Fn(O, OB, &mut MapExtra<'a, '_, I, E>) -> O,
B: IterParser<'a, I, OB, E>,
Self: Sized { ... }
fn rewind(self) -> Rewind<Self>
where Self: Sized { ... }
fn lazy(self) -> Lazy<'a, Self, I, E>
where Self: Sized,
I: ValueInput<'a> { ... }
fn padded(self) -> Padded<Self>
where Self: Sized,
I: Input<'a>,
I::Token: Char { ... }
fn recover_with<S: Strategy<'a, I, O, E>>(
self,
strategy: S
) -> RecoverWith<Self, S>
where Self: Sized { ... }
fn map_err<F>(self, f: F) -> MapErr<Self, F>
where Self: Sized,
F: Fn(E::Error) -> E::Error { ... }
fn map_err_with_state<F>(self, f: F) -> MapErrWithState<Self, F>
where Self: Sized,
F: Fn(E::Error, I::Span, &mut E::State) -> E::Error { ... }
fn validate<U, F>(self, f: F) -> Validate<Self, O, F>
where Self: Sized,
F: Fn(O, &mut MapExtra<'a, '_, I, E>, &mut Emitter<E::Error>) -> U { ... }
fn from_str<U>(self) -> Map<Self, O, fn(_: O) -> Result<U, U::Err>>
where Self: Sized,
U: FromStr,
O: AsRef<str> { ... }
fn unwrapped(self) -> Unwrapped<Self, O>
where Self: Sized { ... }
fn into_iter(self) -> IntoIter<Self, O>
where Self: Sized,
O: IntoIterator { ... }
fn boxed<'b>(self) -> Boxed<'a, 'b, I, O, E>
where Self: MaybeSync + Sized + 'a + 'b { ... }
fn pratt<Ops>(self, ops: Ops) -> Pratt<Self, Ops>
where Self: Sized { ... }
}
Expand description
A trait implemented by parsers.
Parsers take inputs of type I
, which will implement Input
. Refer to the documentation on Input
for examples
of common input types. It will then attempt to parse them into a value of type O
, which may be just about any type.
In doing so, they may encounter errors. These need not be fatal to the parsing process: syntactic errors can be
recovered from and a valid output may still be generated alongside any syntax errors that were encountered along the
way. Usually, this output comes in the form of an
Abstract Syntax Tree (AST).
The final type parameter, E
, is expected to be one of the type in the extra
module,
implementing ParserExtra
. This trait is used to encapsulate the various types a parser
uses that are not simply its input and output. Refer to the documentation on the ParserExtra
trait
for more detail on the contained types. If not provided, it will default to extra::Default
,
which will have the least overhead, but also the least meaningful errors.
The lifetime of the parser is used for zero-copy output - the input is bound by the lifetime, and returned values or parser state may take advantage of this to borrow tokens or slices of the input and hold on to them, if the input supports this.
You cannot directly implement this trait yourself. If you feel like the built-in parsers are not enough for you, there are several options in increasing order of complexity:
-
Try using combinators like
Parser::try_map
andParser::validate
to implement custom error generation -
Use
custom
to implement your own parsing logic inline within an existing parser -
Use chumsky’s
extension
API to write an extension parser that feels like it’s native to chumsky -
If you believe you’ve found a common use-case that’s missing from chumsky, you could open a pull request to implement it in chumsky itself.
Provided Methods§
sourcefn parse(&self, input: I) -> ParseResult<O, E::Error>
fn parse(&self, input: I) -> ParseResult<O, E::Error>
Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way.
If None
is returned (i.e: parsing failed) then there will always be at least one item in the error Vec
.
If you want to include non-default state, use Parser::parse_with_state
instead.
Although the signature of this function looks complicated, it’s simpler than you think! You can pass a
[&[T]
], a &str
, Stream
, or anything implementing Input
to it.
sourcefn parse_with_state(
&self,
input: I,
state: &mut E::State
) -> ParseResult<O, E::Error>
fn parse_with_state( &self, input: I, state: &mut E::State ) -> ParseResult<O, E::Error>
Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way.
The provided state will be passed on to parsers that expect it, such as map_with
.
If None
is returned (i.e: parsing failed) then there will always be at least one item in the error Vec
.
If you want to just use a default state value, use Parser::parse
instead.
Although the signature of this function looks complicated, it’s simpler than you think! You can pass a
[&[T]
], a &str
, Stream
, or anything implementing Input
to it.
sourcefn check(&self, input: I) -> ParseResult<(), E::Error>
fn check(&self, input: I) -> ParseResult<(), E::Error>
Parse a stream of tokens, ignoring any output, and returning any errors encountered along the way.
If parsing failed, then there will always be at least one item in the returned Vec
.
If you want to include non-default state, use Parser::check_with_state
instead.
Although the signature of this function looks complicated, it’s simpler than you think! You can pass a
[&[T]
], a &str
, Stream
, or anything implementing Input
to it.
sourcefn check_with_state(
&self,
input: I,
state: &mut E::State
) -> ParseResult<(), E::Error>
fn check_with_state( &self, input: I, state: &mut E::State ) -> ParseResult<(), E::Error>
Parse a stream of tokens, ignoring any output, and returning any errors encountered along the way.
If parsing failed, then there will always be at least one item in the returned Vec
.
If you want to just use a default state value, use Parser::check
instead.
Although the signature of this function looks complicated, it’s simpler than you think! You can pass a
[&[T]
], a &str
, Stream
, or anything implementing Input
to it.
sourcefn to_slice(self) -> ToSlice<Self, O>where
Self: Sized,
fn to_slice(self) -> ToSlice<Self, O>where
Self: Sized,
Convert the output of this parser into a slice of the input, based on the current parser’s span.
sourcefn filter<F: Fn(&O) -> bool>(self, f: F) -> Filter<Self, F>where
Self: Sized,
fn filter<F: Fn(&O) -> bool>(self, f: F) -> Filter<Self, F>where
Self: Sized,
Filter the output of this parser, accepting only inputs that match the given predicate.
The output type of this parser is I
, the input that was found.
Examples
let lowercase = any::<_, extra::Err<Simple<char>>>()
.filter(char::is_ascii_lowercase)
.repeated()
.at_least(1)
.collect::<String>();
assert_eq!(lowercase.parse("hello").into_result(), Ok("hello".to_string()));
assert!(lowercase.parse("Hello").has_errors());
sourcefn map<U, F: Fn(O) -> U>(self, f: F) -> Map<Self, O, F>where
Self: Sized,
fn map<U, F: Fn(O) -> U>(self, f: F) -> Map<Self, O, F>where
Self: Sized,
Map the output of this parser to another value.
The output type of this parser is U
, the same as the function’s output.
Examples
#[derive(Debug, PartialEq)]
enum Token { Word(String), Num(u64) }
let word = any::<_, extra::Err<Simple<char>>>()
.filter(|c: &char| c.is_alphabetic())
.repeated().at_least(1)
.collect::<String>()
.map(Token::Word);
let num = any::<_, extra::Err<Simple<char>>>()
.filter(|c: &char| c.is_ascii_digit())
.repeated().at_least(1)
.collect::<String>()
.map(|s| Token::Num(s.parse().unwrap()));
let token = word.or(num);
assert_eq!(token.parse("test").into_result(), Ok(Token::Word("test".to_string())));
assert_eq!(token.parse("42").into_result(), Ok(Token::Num(42)));
sourcefn map_with<U, F: Fn(O, &mut MapExtra<'a, '_, I, E>) -> U>(
self,
f: F
) -> MapWith<Self, O, F>where
Self: Sized,
fn map_with<U, F: Fn(O, &mut MapExtra<'a, '_, I, E>) -> U>(
self,
f: F
) -> MapWith<Self, O, F>where
Self: Sized,
Map the output of this parser to another value, with the opportunity to get extra metadata.
The output type of this parser is U
, the same as the function’s output.
Examples
Using the span of the output in the mapping function:
// It's common for AST nodes to use a wrapper type that allows attaching span information to them
#[derive(Debug, PartialEq)]
pub struct Spanned<T>(T, SimpleSpan<usize>);
let ident = text::ascii::ident::<_, _, extra::Err<Simple<char>>>()
.map_with(|ident, e| Spanned(ident, e.span())) // Equivalent to `.map_with_span(|ident, span| Spanned(ident, span))`
.padded();
assert_eq!(ident.parse("hello").into_result(), Ok(Spanned("hello", (0..5).into())));
assert_eq!(ident.parse(" hello ").into_result(), Ok(Spanned("hello", (7..12).into())));
Using the parser state in the mapping function to intern strings:
use std::ops::Range;
use lasso::{Rodeo, Spur};
// It's common for AST nodes to use interned versions of identifiers
// Keys are generally smaller, faster to compare, and can be `Copy`
#[derive(Copy, Clone)]
pub struct Ident(Spur);
let ident = text::ascii::ident::<_, _, extra::Full<Simple<char>, Rodeo, ()>>()
.map_with(|ident, e| Ident(e.state().get_or_intern(ident)))
.padded()
.repeated()
.at_least(1)
.collect::<Vec<_>>();
// Test out parser
let mut interner = Rodeo::new();
match ident.parse_with_state("hello", &mut interner).into_result() {
Ok(idents) => {
assert_eq!(interner.resolve(&idents[0].0), "hello");
}
Err(e) => panic!("Parsing Failed: {:?}", e),
}
match ident.parse_with_state("hello hello", &mut interner).into_result() {
Ok(idents) => {
assert_eq!(idents[0].0, idents[1].0);
}
Err(e) => panic!("Parsing Failed: {:?}", e),
}
Using the parse context in the mapping function:
fn palindrome_parser<'a>() -> impl Parser<'a, &'a str, String> {
recursive(|chain| {
choice((
just(String::new())
.configure(|cfg, ctx: &String| cfg.seq(ctx.clone()))
.then_ignore(end()),
any()
.map_with(|x, e| format!("{x}{}", e.ctx()))
.ignore_with_ctx(chain),
))
})
.with_ctx(String::new())
}
assert_eq!(palindrome_parser().parse("abccba").into_result().as_deref(), Ok("cba"));
assert_eq!(palindrome_parser().parse("hello olleh").into_result().as_deref(), Ok(" olleh"));
assert!(palindrome_parser().parse("abccb").into_result().is_err());
sourcefn map_group<F: Fn<O>>(self, f: F) -> MapGroup<Self, O, F>
fn map_group<F: Fn<O>>(self, f: F) -> MapGroup<Self, O, F>
nightly
only.Map the output of this parser to another value.
If the output of this parser isn’t a tuple, use Parser::map
.
The output type of this parser is U
, the same as the function’s output.
Examples
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Value {
One(u8),
Two(u8, u8),
Three(u8, u8, u8),
}
fn parser<'a>() -> impl Parser<'a, &'a [u8], Vec<Value>> {
choice((
just(1).ignore_then(any()).map(Value::One),
just(2)
.ignore_then(group((any(), any())))
.map_group(Value::Two),
just(3)
.ignore_then(group((any(), any(), any())))
.map_group(Value::Three),
))
.repeated()
.collect()
}
let bytes = &[3, 1, 2, 3, 1, 127, 2, 21, 69];
assert_eq!(
parser().parse(bytes).into_result(),
Ok(vec![
Value::Three(1, 2, 3),
Value::One(127),
Value::Two(21, 69)
])
);
sourcefn to_span(self) -> ToSpan<Self, O>where
Self: Sized,
fn to_span(self) -> ToSpan<Self, O>where
Self: Sized,
Transform the output of this parser to the pattern’s span.
This is commonly used when you know what pattern you’ve parsed and are only interested in the span of the pattern.
The output type of this parser is I::Span
.
Examples
// It's common for AST nodes to use a wrapper type that allows attaching span information to them
#[derive(Debug, PartialEq)]
pub enum Expr<'a> {
Int(&'a str, SimpleSpan),
// The span is that of the operator, '+'
Add(Box<Expr<'a>>, SimpleSpan, Box<Expr<'a>>),
}
let int = text::int::<_, _, extra::Err<Simple<char>>>(10)
.to_slice()
.map_with(|int, e| Expr::Int(int, e.span()))
.padded();
let add_op = just('+').to_span().padded();
let sum = int.foldl(
add_op.then(int).repeated(),
|a, (op_span, b)| Expr::Add(Box::new(a), op_span, Box::new(b)),
);
assert_eq!(sum.parse("42 + 7 + 13").into_result(), Ok(Expr::Add(
Box::new(Expr::Add(
Box::new(Expr::Int("42", (0..2).into())),
(3..4).into(),
Box::new(Expr::Int("7", (5..6).into())),
)),
(7..8).into(),
Box::new(Expr::Int("13", (9..11).into())),
)));
sourcefn try_map<U, F: Fn(O, I::Span) -> Result<U, E::Error>>(
self,
f: F
) -> TryMap<Self, O, F>where
Self: Sized,
fn try_map<U, F: Fn(O, I::Span) -> Result<U, E::Error>>(
self,
f: F
) -> TryMap<Self, O, F>where
Self: Sized,
After a successful parse, apply a fallible function to the output. If the function produces an error, treat it as a parsing error.
If you wish parsing of this pattern to continue when an error is generated instead of halting, consider using
Parser::validate
instead.
The output type of this parser is U
, the Ok
return value of the function.
Examples
let byte = text::int::<_, _, extra::Err<Rich<char>>>(10)
.try_map(|s: &str, span| s
.parse::<u8>()
.map_err(|e| Rich::custom(span, e)));
assert!(byte.parse("255").has_output());
assert!(byte.parse("256").has_errors()); // Out of range
sourcefn try_map_with<U, F: Fn(O, &mut MapExtra<'a, '_, I, E>) -> Result<U, E::Error>>(
self,
f: F
) -> TryMapWith<Self, O, F>where
Self: Sized,
fn try_map_with<U, F: Fn(O, &mut MapExtra<'a, '_, I, E>) -> Result<U, E::Error>>(
self,
f: F
) -> TryMapWith<Self, O, F>where
Self: Sized,
After a successful parse, apply a fallible function to the output, with the opportunity to get extra metadata. If the function produces an error, treat it as a parsing error.
If you wish parsing of this pattern to continue when an error is generated instead of halting, consider using
Parser::validate
instead.
The output type of this parser is U
, the Ok
return value of the function.
sourcefn ignored(self) -> Ignored<Self, O>where
Self: Sized,
fn ignored(self) -> Ignored<Self, O>where
Self: Sized,
Ignore the output of this parser, yielding ()
as an output instead.
This can be used to reduce the cost of parsing by avoiding unnecessary allocations (most collections containing
ZSTs
do not allocate). For example, it’s common to
want to ignore whitespace in many grammars (see text::whitespace
).
The output type of this parser is ()
.
Examples
// A parser that parses any number of whitespace characters without allocating
let whitespace = any::<_, extra::Err<Simple<char>>>()
.filter(|c: &char| c.is_whitespace())
.ignored()
.repeated()
.collect::<Vec<_>>();
assert_eq!(whitespace.parse(" ").into_result(), Ok(vec![(); 4]));
assert!(whitespace.parse(" hello").has_errors());
sourcefn memoized(self) -> Memoized<Self>where
Self: Sized,
fn memoized(self) -> Memoized<Self>where
Self: Sized,
memoization
only.Memoize the parser such that later attempts to parse the same input ‘remember’ the attempt and exit early.
If you’re finding that certain inputs produce exponential behavior in your parser, strategically applying
memoization to a ‘garden path’ rule is often an effective
way to solve the problem. At the limit, applying memoization to all combinators will turn any parser into one
with O(n)
, albeit with very significant per-element overhead and high memory usage.
Memoization also works with recursion, so this can be used to write parsers using left recursion.
sourcefn to<U: Clone>(self, to: U) -> To<Self, O, U>where
Self: Sized,
fn to<U: Clone>(self, to: U) -> To<Self, O, U>where
Self: Sized,
Transform all outputs of this parser to a predetermined value.
The output type of this parser is U
, the type of the predetermined value.
Examples
#[derive(Clone, Debug, PartialEq)]
enum Op { Add, Sub, Mul, Div }
let op = just::<_, _, extra::Err<Simple<char>>>('+').to(Op::Add)
.or(just('-').to(Op::Sub))
.or(just('*').to(Op::Mul))
.or(just('/').to(Op::Div));
assert_eq!(op.parse("+").into_result(), Ok(Op::Add));
assert_eq!(op.parse("/").into_result(), Ok(Op::Div));
sourcefn labelled<L>(self, label: L) -> Labelled<Self, L>
fn labelled<L>(self, label: L) -> Labelled<Self, L>
label
only.Label this parser with the given label.
Labelling a parser makes all errors generated by the parser refer to the label rather than any sub-elements within the parser. For example, labelling a parser for an expression would yield “expected expression” errors rather than “expected integer, string, binary op, etc.” errors.
sourcefn then<U, B: Parser<'a, I, U, E>>(self, other: B) -> Then<Self, B, O, U, E>where
Self: Sized,
fn then<U, B: Parser<'a, I, U, E>>(self, other: B) -> Then<Self, B, O, U, E>where
Self: Sized,
Parse one thing and then another thing, yielding a tuple of the two outputs.
The output type of this parser is (O, U)
, a combination of the outputs of both parsers.
Examples
let word = any::<_, extra::Err<Simple<char>>>()
.filter(|c: &char| c.is_alphabetic())
.repeated()
.at_least(1)
.collect::<String>();
let two_words = word.then_ignore(just(' ')).then(word);
assert_eq!(two_words.parse("dog cat").into_result(), Ok(("dog".to_string(), "cat".to_string())));
assert!(two_words.parse("hedgehog").has_errors());
sourcefn ignore_then<U, B: Parser<'a, I, U, E>>(
self,
other: B
) -> IgnoreThen<Self, B, O, E>where
Self: Sized,
fn ignore_then<U, B: Parser<'a, I, U, E>>(
self,
other: B
) -> IgnoreThen<Self, B, O, E>where
Self: Sized,
Parse one thing and then another thing, yielding only the output of the latter.
The output type of this parser is U
, the same as the second parser.
Examples
let zeroes = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| *c == '0').ignored().repeated().collect::<Vec<_>>();
let digits = any().filter(|c: &char| c.is_ascii_digit())
.repeated()
.collect::<String>();
let integer = zeroes
.ignore_then(digits)
.from_str()
.unwrapped();
assert_eq!(integer.parse("00064").into_result(), Ok(64));
assert_eq!(integer.parse("32").into_result(), Ok(32));
sourcefn then_ignore<U, B: Parser<'a, I, U, E>>(
self,
other: B
) -> ThenIgnore<Self, B, U, E>where
Self: Sized,
fn then_ignore<U, B: Parser<'a, I, U, E>>(
self,
other: B
) -> ThenIgnore<Self, B, U, E>where
Self: Sized,
Parse one thing and then another thing, yielding only the output of the former.
The output type of this parser is O
, the same as the original parser.
Examples
let word = any::<_, extra::Err<Simple<char>>>()
.filter(|c: &char| c.is_alphabetic())
.repeated()
.at_least(1)
.collect::<String>();
let punctuated = word
.then_ignore(just('!').or(just('?')).or_not());
let sentence = punctuated
.padded() // Allow for whitespace gaps
.repeated()
.collect::<Vec<_>>();
assert_eq!(
sentence.parse("hello! how are you?").into_result(),
Ok(vec![
"hello".to_string(),
"how".to_string(),
"are".to_string(),
"you".to_string(),
]),
);
sourcefn nested_in<B: Parser<'a, I, I, E>>(self, other: B) -> NestedIn<Self, B, O, E>where
Self: Sized,
I: 'a,
fn nested_in<B: Parser<'a, I, I, E>>(self, other: B) -> NestedIn<Self, B, O, E>where
Self: Sized,
I: 'a,
Parse input as part of a token-tree - using an input generated from within the current
input. In other words, this parser will attempt to create a new input stream from within
the one it is being run on, and the parser it was called on will be provided this new input.
By default, the original parser is expected to consume up to the end of the new stream. To
allow only consuming part of the stream, use Parser::lazy
to ignore trailing tokens.
The provided parser P
is expected to have both an input and output type which match the input
type of the parser it is called on. As an example, if the original parser takes an input of
Stream<Iterator<Item = T>>
, P
will be run first against that input, and is expected to
output a new Stream<Iterator<Item = T>>
which the original parser will be run against.
The output of this parser is O
, the output of the parser it is called on.
Examples
#[derive(Debug, Clone, PartialEq)]
enum Token<'a> {
Struct,
Ident(&'a str),
Item(&'a str),
Group(Vec<Token<'a>>),
}
let group = select_ref! { Token::Group(g) => g.as_slice() };
let ident = select_ref! { Token::Ident(i) => *i };
let items = select_ref! { Token::Item(i) => *i }
.repeated()
.collect::<Vec<_>>()
.nested_in(group);
let struc = just::<_, _, extra::Err<Simple<_>>>(&Token::Struct)
.ignore_then(ident)
.then(items);
let tl = struc
.repeated()
.collect::<Vec<_>>();
let tokens = [
Token::Struct,
Token::Ident("foo"),
Token::Group(vec![
Token::Item("a"),
Token::Item("b"),
]),
];
assert_eq!(tl.parse(&tokens).into_result(), Ok(vec![("foo", vec!["a", "b"])]));
sourcefn ignore_with_ctx<U, P>(
self,
then: P
) -> IgnoreWithCtx<Self, P, O, I, Full<E::Error, E::State, O>>
fn ignore_with_ctx<U, P>( self, then: P ) -> IgnoreWithCtx<Self, P, O, I, Full<E::Error, E::State, O>>
Parse one thing and then another thing, creating the second parser from the result of
the first. If you don’t need the context in the output, use Parser::then_with_ctx
.
The output of this parser is U
, the result of the second parser
Error recovery for this parser may be sub-optimal, as if the first parser succeeds on recovery then the second produces an error, the primary error will point to the location in the second parser which failed, ignoring that the first parser may be the root cause. There may be other pathological errors cases as well.
Examples
let successor = just(b'\0').configure(|cfg, ctx: &u8| cfg.seq(*ctx + 1));
// A parser that parses a single letter and then its successor
let successive_letters = one_of::<_, _, extra::Err<Simple<u8>>>(b'a'..=b'z')
.ignore_with_ctx(successor);
assert_eq!(successive_letters.parse(b"ab").into_result(), Ok(b'b')); // 'b' follows 'a'
assert!(successive_letters.parse(b"ac").has_errors()); // 'c' does not follow 'a'
sourcefn then_with_ctx<U, P>(
self,
then: P
) -> ThenWithCtx<Self, P, O, I, Full<E::Error, E::State, O>>
fn then_with_ctx<U, P>( self, then: P ) -> ThenWithCtx<Self, P, O, I, Full<E::Error, E::State, O>>
Parse one thing and then another thing, creating the second parser from the result of
the first. If you don’t need the context in the output, prefer Parser::ignore_with_ctx
.
The output of this parser is (E::Context, O)
,
a combination of the context and the output of the parser.
Error recovery for this parser may be sub-optimal, as if the first parser succeeds on recovery then the second produces an error, the primary error will point to the location in the second parser which failed, ignoring that the first parser may be the root cause. There may be other pathological errors cases as well.
sourcefn with_ctx<Ctx>(self, ctx: Ctx) -> WithCtx<Self, Ctx>
fn with_ctx<Ctx>(self, ctx: Ctx) -> WithCtx<Self, Ctx>
Run the previous contextual parser with the provided context
let generic = just(b'0').configure(|cfg, ctx: &u8| cfg.seq(*ctx));
let parse_a = just::<_, _, extra::Default>(b'b').ignore_then(generic.with_ctx::<u8>(b'a'));
let parse_b = just::<_, _, extra::Default>(b'a').ignore_then(generic.with_ctx(b'b'));
assert_eq!(parse_a.parse(b"ba" as &[_]).into_result(), Ok::<_, Vec<EmptyErr>>(b'a'));
assert!(parse_a.parse(b"bb").has_errors());
assert_eq!(parse_b.parse(b"ab" as &[_]).into_result(), Ok(b'b'));
assert!(parse_b.parse(b"aa").has_errors());
sourcefn with_state<State>(self, state: State) -> WithState<Self, State>
fn with_state<State>(self, state: State) -> WithState<Self, State>
TODO
sourcefn and_is<U, B>(self, other: B) -> AndIs<Self, B, U>
fn and_is<U, B>(self, other: B) -> AndIs<Self, B, U>
Applies both parsers to the same position in the input, succeeding
only if both succeed. The returned value will be that of the first parser,
and the input will be at the end of the first parser if and_is
succeeds.
The second parser is allowed to consume more or less input than the first parser, but like its output, how much it consumes won’t affect the final result.
The motivating use-case is in combination with Parser::not
, allowing a parser
to consume something only if it isn’t also something like an escape sequence or a nested block.
Examples
let escape = just("\\n").to('\n');
// C-style string literal
let string = none_of::<_, _, extra::Err<Simple<char>>>('"')
.and_is(escape.not())
.or(escape)
.repeated()
.collect::<String>()
.padded_by(just('"'));
assert_eq!(
string.parse("\"wxyz\"").into_result().as_deref(),
Ok("wxyz"),
);
assert_eq!(
string.parse("\"a\nb\"").into_result().as_deref(),
Ok("a\nb"),
);
sourcefn delimited_by<U, V, B, C>(
self,
start: B,
end: C
) -> DelimitedBy<Self, B, C, U, V>
fn delimited_by<U, V, B, C>( self, start: B, end: C ) -> DelimitedBy<Self, B, C, U, V>
Parse the pattern surrounded by the given delimiters.
The output type of this parser is O
, the same as the original parser.
Examples
// A LISP-style S-expression
#[derive(Debug, PartialEq)]
enum SExpr {
Ident(String),
Num(u64),
List(Vec<SExpr>),
}
let ident = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_alphabetic())
.repeated()
.at_least(1)
.collect::<String>();
let num = text::int(10)
.from_str()
.unwrapped();
let s_expr = recursive(|s_expr| s_expr
.padded()
.repeated()
.collect::<Vec<_>>()
.map(SExpr::List)
.delimited_by(just('('), just(')'))
.or(ident.map(SExpr::Ident))
.or(num.map(SExpr::Num)));
// A valid input
assert_eq!(
s_expr.parse("(add (mul 42 3) 15)").into_result(),
Ok(SExpr::List(vec![
SExpr::Ident("add".to_string()),
SExpr::List(vec![
SExpr::Ident("mul".to_string()),
SExpr::Num(42),
SExpr::Num(3),
]),
SExpr::Num(15),
])),
);
sourcefn padded_by<U, B>(self, padding: B) -> PaddedBy<Self, B, U>
fn padded_by<U, B>(self, padding: B) -> PaddedBy<Self, B, U>
Parse a pattern, but with an instance of another pattern on either end, yielding the output of the inner.
The output type of this parser is O
, the same as the original parser.
Examples
let ident = text::ascii::ident::<_, _, extra::Err<Simple<char>>>()
.padded_by(just('!'));
assert_eq!(ident.parse("!hello!").into_result(), Ok("hello"));
assert!(ident.parse("hello!").has_errors());
assert!(ident.parse("!hello").has_errors());
assert!(ident.parse("hello").has_errors());
sourcefn or<B>(self, other: B) -> Or<Self, B>
fn or<B>(self, other: B) -> Or<Self, B>
Parse one thing or, on failure, another thing.
The output of both parsers must be of the same type, because either output can be produced.
If both parser succeed, the output of the first parser is guaranteed to be prioritized over the output of the second.
If both parsers produce errors, the combinator will attempt to select from or combine the errors to produce an error that is most likely to be useful to a human attempting to understand the problem. The exact algorithm used is left unspecified, and is not part of the crate’s semver guarantees, although regressions in error quality should be reported in the issue tracker of the main repository.
Please note that long chains of Parser::or
combinators have been known to result in poor compilation times.
If you feel you are experiencing this, consider using choice
instead.
The output type of this parser is O
, the output of both parsers.
Examples
let op = just::<_, _, extra::Err<Simple<char>>>('+')
.or(just('-'))
.or(just('*'))
.or(just('/'));
assert_eq!(op.parse("+").into_result(), Ok('+'));
assert_eq!(op.parse("/").into_result(), Ok('/'));
assert!(op.parse("!").has_errors());
sourcefn or_not(self) -> OrNot<Self>where
Self: Sized,
fn or_not(self) -> OrNot<Self>where
Self: Sized,
Attempt to parse something, but only if it exists.
If parsing of the pattern is successful, the output is Some(_)
. Otherwise, the output is None
.
The output type of this parser is Option<O>
.
Examples
let word = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_alphabetic())
.repeated()
.at_least(1)
.collect::<String>();
let word_or_question = word
.then(just('?').or_not());
assert_eq!(word_or_question.parse("hello?").into_result(), Ok(("hello".to_string(), Some('?'))));
assert_eq!(word_or_question.parse("wednesday").into_result(), Ok(("wednesday".to_string(), None)));
sourcefn not(self) -> Not<Self, O>where
Self: Sized,
fn not(self) -> Not<Self, O>where
Self: Sized,
Invert the result of the contained parser, failing if it succeeds and succeeding if it fails.
The output of this parser is always ()
, the unit type.
The motivating case for this is in combination with Parser::and_is
, allowing a parser
to consume something only if it isn’t also something like an escape sequence or a nested block.
Caveats:
- The error message produced by
not
by default will likely be fairly unhelpful - it can only tell the span that was wrong. - If not careful, it’s fairly easy to create non-intuitive behavior due to end-of-input
being a valid token for a parser to consume, and as most parsers fail at end of input,
not
will succeed on it.
#[derive(Debug, PartialEq)]
enum Tree<'a> {
Text(&'a str),
Group(Vec<Self>),
}
// Arbitrary text, nested in a tree with { ... } delimiters
let tree = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|tree| {
let text = any()
.and_is(one_of("{}").not())
.repeated()
.at_least(1)
.to_slice()
.map(Tree::Text);
let group = tree
.repeated()
.collect()
.delimited_by(just('{'), just('}'))
.map(Tree::Group);
text.or(group)
});
assert_eq!(
tree.parse("{abcd{efg{hijk}lmn{opq}rs}tuvwxyz}").into_result(),
Ok(Tree::Group(vec![
Tree::Text("abcd"),
Tree::Group(vec![
Tree::Text("efg"),
Tree::Group(vec![
Tree::Text("hijk"),
]),
Tree::Text("lmn"),
Tree::Group(vec![
Tree::Text("opq"),
]),
Tree::Text("rs"),
]),
Tree::Text("tuvwxyz"),
])),
);
sourcefn repeated(self) -> Repeated<Self, O, I, E>where
Self: Sized,
fn repeated(self) -> Repeated<Self, O, I, E>where
Self: Sized,
Parse a pattern zero or more times (analog to Regex’s <PAT>*
).
Input is eagerly parsed. Be aware that the parser will accept no occurrences of the pattern too. Consider using
Repeated::at_least
instead if you wish to parse a minimum number of elements.
The output type of this parser is, by default, ()
. If you want to collect the items into a Container
(such as a Vec
), use IterParser::collect
.
Examples
let num = any::<_, extra::Err<Simple<char>>>()
.filter(|c: &char| c.is_ascii_digit())
.repeated()
.at_least(1)
.collect::<String>()
.from_str()
.unwrapped();
let sum = num.clone()
.foldl(just('+').ignore_then(num).repeated(), |a, b| a + b);
assert_eq!(sum.parse("2+13+4+0+5").into_result(), Ok(24));
sourcefn separated_by<U, B>(self, separator: B) -> SeparatedBy<Self, B, O, U, I, E>
fn separated_by<U, B>(self, separator: B) -> SeparatedBy<Self, B, O, U, I, E>
Parse a pattern, separated by another, any number of times.
You can use SeparatedBy::allow_leading
or SeparatedBy::allow_trailing
to allow leading or trailing
separators.
The output type of this parser can be any Container
.
Examples
let shopping = text::ascii::ident::<_, _, extra::Err<Simple<char>>>()
.padded()
.separated_by(just(','))
.collect::<Vec<_>>();
assert_eq!(shopping.parse("eggs").into_result(), Ok(vec!["eggs"]));
assert_eq!(shopping.parse("eggs, flour, milk").into_result(), Ok(vec!["eggs", "flour", "milk"]));
See SeparatedBy::allow_leading
and SeparatedBy::allow_trailing
for more examples.
sourcefn foldl<B, F, OB>(self, other: B, f: F) -> Foldl<F, Self, B, OB, E>
fn foldl<B, F, OB>(self, other: B, f: F) -> Foldl<F, Self, B, OB, E>
Left-fold the output of the parser into a single value.
The output of the original parser must be of type (A, impl IntoIterator<Item = B>)
.
The output type of this parser is A
, the left-hand component of the original parser’s output.
Examples
let int = text::int::<_, _, extra::Err<Simple<char>>>(10)
.from_str()
.unwrapped();
let sum = int
.clone()
.foldl(just('+').ignore_then(int).repeated(), |a, b| a + b);
assert_eq!(sum.parse("1+12+3+9").into_result(), Ok(25));
assert_eq!(sum.parse("6").into_result(), Ok(6));
sourcefn foldl_with<B, F, OB>(self, other: B, f: F) -> FoldlWith<F, Self, B, OB, E>
fn foldl_with<B, F, OB>(self, other: B, f: F) -> FoldlWith<F, Self, B, OB, E>
Left-fold the output of the parser into a single value, making use of the parser’s state when doing so.
The output of the original parser must be of type (A, impl IntoIterator<Item = B>)
.
The output type of this parser is A
, the left-hand component of the original parser’s output.
Examples
General
let int = text::int::<_, _, extra::Full<Simple<char>, i32, ()>>(10)
.from_str()
.unwrapped();
let sum = int
.clone()
.foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * *e.state());
let mut multiplier = 2i32;
assert_eq!(sum.parse_with_state("1+12+3+9", &mut multiplier).into_result(), Ok(134));
assert_eq!(sum.parse_with_state("6", &mut multiplier).into_result(), Ok(6));
Interning / Arena Allocation
This example assumes use of the slotmap
crate for arena allocation.
use slotmap::{new_key_type, SlotMap};
// Metadata type for node Ids for extra type safety
new_key_type! {
pub struct NodeId;
}
// AST nodes reference other nodes with `NodeId`s instead of containing boxed/owned values
#[derive(Copy, Clone, Debug, PartialEq)]
enum Expr {
Int(i32),
Add(NodeId, NodeId),
}
type NodeArena = SlotMap<NodeId, Expr>;
// Now, define our parser
let int = text::int::<&str, _, extra::Full<Simple<char>, NodeArena, ()>>(10)
.padded()
.map_with(|s, e|
// Return the ID of the new integer node
e.state().insert(Expr::Int(s.parse().unwrap()))
);
let sum = int.foldl_with(
just('+').padded().ignore_then(int).repeated(),
|a: NodeId, b: NodeId, e| {
// Inserting an item into the arena returns its ID
e.state().insert(Expr::Add(a, b))
}
);
// Test our parser
let mut arena = NodeArena::default();
let four_plus_eight = sum.parse_with_state("4 + 8", &mut arena).unwrap();
if let Expr::Add(a, b) = arena[four_plus_eight] {
assert_eq!(arena[a], Expr::Int(4));
assert_eq!(arena[b], Expr::Int(8));
} else {
panic!("Not an Expr::Add");
}
sourcefn rewind(self) -> Rewind<Self>where
Self: Sized,
fn rewind(self) -> Rewind<Self>where
Self: Sized,
Parse a pattern. Afterwards, the input stream will be rewound to its original state, as if parsing had not occurred.
This combinator is useful for cases in which you wish to avoid a parser accidentally consuming too much input, causing later parsers to fail as a result. A typical use-case of this is that you want to parse something that is not followed by something else.
The output type of this parser is O
, the same as the original parser.
Examples
let just_numbers = text::digits::<_, _, extra::Err<Simple<char>>>(10)
.to_slice()
.padded()
.then_ignore(none_of("+-*/").rewind())
.separated_by(just(','))
.collect::<Vec<_>>();
// 3 is not parsed because it's followed by '+'.
assert_eq!(just_numbers.lazy().parse("1, 2, 3 + 4").into_result(), Ok(vec!["1", "2"]));
sourcefn lazy(self) -> Lazy<'a, Self, I, E>where
Self: Sized,
I: ValueInput<'a>,
fn lazy(self) -> Lazy<'a, Self, I, E>where
Self: Sized,
I: ValueInput<'a>,
Make the parser lazy, such that it parses as much as it validly can and then finished successfully, leaving trailing input untouched.
The output type of this parser is O
, the same as the original parser.
Examples
let digits = one_of::<_, _, extra::Err<Simple<char>>>('0'..='9')
.repeated()
.collect::<String>()
.lazy();
assert_eq!(digits.parse("12345abcde").into_result().as_deref(), Ok("12345"));
sourcefn padded(self) -> Padded<Self>
fn padded(self) -> Padded<Self>
Parse a pattern, ignoring any amount of whitespace both before and after the pattern.
The output type of this parser is O
, the same as the original parser.
Examples
let ident = text::ascii::ident::<_, _, extra::Err<Simple<char>>>().padded();
// A pattern with no whitespace surrounding it is accepted
assert_eq!(ident.parse("hello").into_result(), Ok("hello"));
// A pattern with arbitrary whitespace surrounding it is also accepted
assert_eq!(ident.parse(" \t \n \t world \t ").into_result(), Ok("world"));
sourcefn recover_with<S: Strategy<'a, I, O, E>>(
self,
strategy: S
) -> RecoverWith<Self, S>where
Self: Sized,
fn recover_with<S: Strategy<'a, I, O, E>>(
self,
strategy: S
) -> RecoverWith<Self, S>where
Self: Sized,
Apply a fallback recovery strategy to this parser should it fail.
There is no silver bullet for error recovery, so this function allows you to specify one of several different strategies at the location of your choice. Prefer an error recovery strategy that more precisely mirrors valid syntax where possible to make error recovery more reliable.
Because chumsky is a PEG parser, which always take the first successful parsing route through a grammar, recovering from an error may cause the parser to erroneously miss alternative valid routes through the grammar that do not generate recoverable errors. If you run into cases where valid syntax fails to parse without errors, this might be happening: consider removing error recovery or switching to a more specific error recovery strategy.
The output type of this parser is O
, the same as the original parser.
Examples
#[derive(Debug, PartialEq)]
enum Expr<'a> {
Error,
Int(&'a str),
List(Vec<Expr<'a>>),
}
let recovery = just::<_, _, extra::Err<Simple<char>>>('[')
.then(none_of(']').repeated().then(just(']')));
let expr = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|expr| expr
.separated_by(just(','))
.collect::<Vec<_>>()
.delimited_by(just('['), just(']'))
.map(Expr::List)
// If parsing a list expression fails, recover at the next delimiter, generating an error AST node
.recover_with(via_parser(recovery.map(|_| Expr::Error)))
.or(text::int(10).map(Expr::Int))
.padded());
assert!(expr.parse("five").has_errors()); // Text is not a valid expression in this language...
assert_eq!(
expr.parse("[1, 2, 3]").into_result(),
Ok(Expr::List(vec![Expr::Int("1"), Expr::Int("2"), Expr::Int("3")])),
); // ...but lists and numbers are!
// This input has two syntax errors...
let res = expr.parse("[[1, two], [3, four]]");
// ...and error recovery allows us to catch both of them!
assert_eq!(res.errors().len(), 2);
// Additionally, the AST we get back still has useful information.
assert_eq!(res.output(), Some(&Expr::List(vec![Expr::Error, Expr::Error])));
sourcefn map_err<F>(self, f: F) -> MapErr<Self, F>
fn map_err<F>(self, f: F) -> MapErr<Self, F>
Map the primary error of this parser to another value.
This function is most useful when using a custom error type, allowing you to augment errors according to context.
The output type of this parser is O
, the same as the original parser.
sourcefn map_err_with_state<F>(self, f: F) -> MapErrWithState<Self, F>
fn map_err_with_state<F>(self, f: F) -> MapErrWithState<Self, F>
Map the primary error of this parser to another value, making use of the parser state.
This function is useful for augmenting errors to allow them to include context in non context-free languages, or provide contextual notes on possible causes.
The output type of this parser is O
, the same as the original parser.
sourcefn validate<U, F>(self, f: F) -> Validate<Self, O, F>
fn validate<U, F>(self, f: F) -> Validate<Self, O, F>
Validate an output, producing non-terminal errors if it does not fulfill certain criteria. The errors will not immediately halt parsing on this path, but instead it will continue, potentially emitting one or more other errors, only failing after the pattern has otherwise successfully, or emitted another terminal error.
This function also permits mapping the output to a value of another type, similar to Parser::map
.
If you wish parsing of this pattern to halt when an error is generated instead of continuing, consider using
Parser::try_map
instead.
The output type of this parser is U
, the result of the validation closure.
Examples
let large_int = text::int::<_, _, extra::Err<Rich<char>>>(10)
.from_str()
.unwrapped()
.validate(|x: u32, e, emitter| {
if x < 256 { emitter.emit(Rich::custom(e.span(), format!("{} must be 256 or higher.", x))) }
x
});
assert_eq!(large_int.parse("537").into_result(), Ok(537));
assert!(large_int.parse("243").into_result().is_err());
To show the difference in behavior from Parser::try_map
:
// start with the same large_int validator
let large_int_val = text::int::<_, _, extra::Err<Rich<char>>>(10)
.from_str()
.unwrapped()
.validate(|x: u32, e, emitter| {
if x < 256 { emitter.emit(Rich::custom(e.span(), format!("{} must be 256 or higher", x))) }
x
});
// A try_map version of the same parser
let large_int_tm = text::int::<_, _, extra::Err<Rich<char>>>(10)
.from_str()
.unwrapped()
.try_map(|x: u32, span| {
if x < 256 {
Err(Rich::custom(span, format!("{} must be 256 or higher", x)))
} else {
Ok(x)
}
});
// Parser that uses the validation version
let multi_step_val = large_int_val.then(text::ascii::ident().padded());
// Parser that uses the try_map version
let multi_step_tm = large_int_tm.then(text::ascii::ident().padded());
// On success, both parsers are equivalent
assert_eq!(
multi_step_val.parse("512 foo").into_result(),
Ok((512, "foo"))
);
assert_eq!(
multi_step_tm.parse("512 foo").into_result(),
Ok((512, "foo"))
);
// However, on failure, they may produce different errors:
assert_eq!(
multi_step_val.parse("100 2").into_result(),
Err(vec![
Rich::<char>::custom((0..3).into(), "100 must be 256 or higher"),
<Rich<char> as Error<&str>>::expected_found([], Some(MaybeRef::Val('2')), (4..5).into()),
])
);
assert_eq!(
multi_step_tm.parse("100 2").into_result(),
Err(vec![Rich::<char>::custom((0..3).into(), "100 must be 256 or higher")])
);
As is seen in the above example, validation doesn’t prevent the emission of later errors in the same parser, but still produces an error in the output.
sourcefn from_str<U>(self) -> Map<Self, O, fn(_: O) -> Result<U, U::Err>>
fn from_str<U>(self) -> Map<Self, O, fn(_: O) -> Result<U, U::Err>>
Attempt to convert the output of this parser into something else using Rust’s FromStr
trait.
This is most useful when wanting to convert literal values into their corresponding Rust type, such as when parsing integers.
The output type of this parser is Result<U, U::Err>
, the result of attempting to parse the output, O
, into
the value U
.
Examples
let uint64 = text::int::<_, _, extra::Err<Simple<char>>>(10)
.from_str::<u64>()
.unwrapped();
assert_eq!(uint64.parse("7").into_result(), Ok(7));
assert_eq!(uint64.parse("42").into_result(), Ok(42));
sourcefn unwrapped(self) -> Unwrapped<Self, O>where
Self: Sized,
fn unwrapped(self) -> Unwrapped<Self, O>where
Self: Sized,
For parsers that produce a Result
as their output, unwrap the result (panicking if an Err
is
encountered).
In general, this method should be avoided except in cases where all possible that the parser might produce can
by parsed using FromStr
without producing an error.
This combinator is not named unwrap
to avoid confusion: it unwraps during parsing, not immediately.
The output type of this parser is U
, the Ok
value of the Result
.
Examples
let boolean = just::<_, _, extra::Err<Simple<char>>>("true")
.or(just("false"))
.from_str::<bool>()
.unwrapped(); // Cannot panic: the only possible outputs generated by the parser are "true" or "false"
assert_eq!(boolean.parse("true").into_result(), Ok(true));
assert_eq!(boolean.parse("false").into_result(), Ok(false));
// Does not panic, because the original parser only accepts "true" or "false"
assert!(boolean.parse("42").has_errors());
sourcefn into_iter(self) -> IntoIter<Self, O>where
Self: Sized,
O: IntoIterator,
fn into_iter(self) -> IntoIter<Self, O>where
Self: Sized,
O: IntoIterator,
Turn this Parser
into an IterParser
if its output type implements IntoIterator
.
The resulting iterable parser will emit each element of the output type in turn.
This is broadly analogous to functions like Vec::into_iter
, but operating at the level of parser outputs.
sourcefn boxed<'b>(self) -> Boxed<'a, 'b, I, O, E>
fn boxed<'b>(self) -> Boxed<'a, 'b, I, O, E>
Box the parser, yielding a parser that performs parsing through dynamic dispatch.
Boxing a parser might be useful for:
-
Dynamically building up parsers at run-time
-
Improving compilation times (Rust can struggle to compile code containing very long types)
-
Passing a parser over an FFI boundary
-
Getting around compiler implementation problems with long types such as this.
-
Places where you need to name the type of a parser
Boxing a parser is broadly equivalent to boxing other combinators via dynamic dispatch, such as Iterator
.
The output type of this parser is O
, the same as the original parser.
Examples
When not using boxed
, the following patterns are either impossible or very difficult to express:
pub trait Parseable: Sized {
type Parser<'a>: Parser<'a, &'a str, Self>;
fn parser<'a>() -> Self::Parser<'a>;
}
impl Parseable for i32 {
// We *can* write this type, but it will be very impractical, and change on any alterations
// to the implementation
type Parser<'a> = ???;
fn parser<'a>() -> Self::Parser<'a> {
todo()
}
}
let user_input = user_input();
let mut parser = just('a');
for i in user_input {
// Doesn't work due to type mismatch - since every combinator creates a unique type
parser = parser.or(i);
}
let parser = parser.then(just('z'));
let _ = parser.parse("b").into_result();
However, with boxed
, we can express them by making the parsers all share a common type:
use chumsky::prelude::*;
pub trait Parseable: Sized {
fn parser<'a>() -> Boxed<'a, 'a, &'a str, Self, extra::Default>;
}
impl Parseable for i32 {
fn parser<'a>() -> Boxed<'a, 'a, &'a str, Self, extra::Default> {
todo().boxed()
}
}
let user_input = user_input();
let mut parser = just('a').boxed();
for i in user_input {
// Doesn't work due to type mismatch - since every combinator creates a unique type
parser = parser.or(i).boxed();
}
let parser = parser.then(just('z'));
parser.parse("az").into_result().unwrap();
sourcefn pratt<Ops>(self, ops: Ops) -> Pratt<Self, Ops>where
Self: Sized,
fn pratt<Ops>(self, ops: Ops) -> Pratt<Self, Ops>where
Self: Sized,
pratt
only.Use Pratt parsing to ergonomically parse this pattern separated by prefix, postfix, and infix operators of various associativites and precedence.
Pratt parsing is a powerful technique and is recommended when writing parsers for expressions.
Example
See the documentation in pratt
for more extensive examples and details.
use chumsky::pratt::*;
use std::ops::{Neg, Mul, Div, Add, Sub};
let int = text::int::<_, _, extra::Err<Rich<char>>>(10)
.from_str()
.unwrapped()
.padded();
let op = |c| just(c).padded();
let expr = int.pratt((
prefix(2, op('-'), i64::neg),
infix(left(1), op('*'), i64::mul),
infix(left(1), op('/'), i64::div),
infix(left(0), op('+'), i64::add),
infix(left(0), op('-'), i64::sub),
));
// Pratt parsing can handle unary operators...
assert_eq!(expr.parse("-7").into_result(), Ok(-7));
// ...and infix binary operators...
assert_eq!(expr.parse("6 + 3").into_result(), Ok(9));
// ...and arbitrary precedence levels between them.
assert_eq!(expr.parse("2 + 3 * -4").into_result(), Ok(-10));