Trait chumsky::Parser[][src]

pub trait Parser<I: Clone, O> {
    type Error: Error<I>;
Show 31 methods fn parse_inner<D: Debugger>(
        &self,
        debugger: &mut D,
        stream: &mut Stream<'_, I, <Self::Error as Error<I>>::Span>
    ) -> (Vec<Located<I, Self::Error>>, Result<(O, Option<Located<I, Self::Error>>), Located<I, Self::Error>>)
    where
        Self: Sized
;
fn parse_inner_verbose(
        &self,
        d: &mut Verbose,
        s: &mut Stream<'_, I, <Self::Error as Error<I>>::Span>
    ) -> (Vec<Located<I, Self::Error>>, Result<(O, Option<Located<I, Self::Error>>), Located<I, Self::Error>>);
fn parse_inner_silent(
        &self,
        d: &mut Silent,
        s: &mut Stream<'_, I, <Self::Error as Error<I>>::Span>
    ) -> (Vec<Located<I, Self::Error>>, Result<(O, Option<Located<I, Self::Error>>), Located<I, Self::Error>>); fn parse_recovery_inner<'a, D: Debugger, Iter: Iterator<Item = (I, <Self::Error as Error<I>>::Span)> + 'a, S: Into<Stream<'a, I, <Self::Error as Error<I>>::Span, Iter>>>(
        &self,
        debugger: &mut D,
        stream: S
    ) -> (Option<O>, Vec<Self::Error>)
    where
        Self: Sized
, { ... }
fn parse_recovery<'a, Iter: Iterator<Item = (I, <Self::Error as Error<I>>::Span)> + 'a, S: Into<Stream<'a, I, <Self::Error as Error<I>>::Span, Iter>>>(
        &self,
        stream: S
    ) -> (Option<O>, Vec<Self::Error>)
    where
        Self: Sized
, { ... }
fn parse_debug<'a, Iter: Iterator<Item = (I, <Self::Error as Error<I>>::Span)> + 'a, S: Into<Stream<'a, I, <Self::Error as Error<I>>::Span, Iter>>>(
        &self,
        stream: S
    ) -> (Option<O>, Vec<Self::Error>)
    where
        Self: Sized
, { ... }
fn parse<'a, Iter: Iterator<Item = (I, <Self::Error as Error<I>>::Span)> + 'a, S: Into<Stream<'a, I, <Self::Error as Error<I>>::Span, Iter>>>(
        &self,
        stream: S
    ) -> Result<O, Vec<Self::Error>>
    where
        Self: Sized
, { ... }
fn debug<T: Display + 'static>(self, x: T) -> Debug<Self>
    where
        Self: Sized
, { ... }
fn map<U, F: Fn(O) -> U>(self, f: F) -> Map<Self, F, O>
    where
        Self: Sized
, { ... }
fn map_with_span<U, F: Fn(O, <Self::Error as Error<I>>::Span) -> U>(
        self,
        f: F
    ) -> MapWithSpan<Self, F, O>
    where
        Self: Sized
, { ... }
fn map_err<F: Fn(Self::Error) -> Self::Error>(self, f: F) -> MapErr<Self, F>
    where
        Self: Sized
, { ... }
fn try_map<U, F: Fn(O) -> Result<U, Self::Error>>(
        self,
        f: F
    ) -> TryMap<Self, F, O>
    where
        Self: Sized
, { ... }
fn labelled<L: Into<<Self::Error as Error<I>>::Label> + Clone>(
        self,
        label: L
    ) -> Label<Self, L>
    where
        Self: Sized
, { ... }
fn to<U: Clone>(self, x: U) -> To<Self, O, U>
    where
        Self: Sized
, { ... }
fn foldl<A, B, F: Fn(A, B) -> A>(self, f: F) -> Foldl<Self, F, A, B>
    where
        Self: Parser<I, (A, Vec<B>)> + Sized
, { ... }
fn foldr<'a, A, B, F: Fn(A, B) -> B + 'a>(
        self,
        f: F
    ) -> Foldr<Self, F, A, B>
    where
        Self: Parser<I, (Vec<A>, B)> + Sized
, { ... }
fn ignored(self) -> Ignored<Self, O>
    where
        Self: Sized
, { ... }
fn collect<C: FromIterator<O::Item>>(self) -> Map<Self, fn(_: O) -> C, O>
    where
        Self: Sized,
        O: IntoIterator
, { ... }
fn then<U, P: Parser<I, U>>(self, other: P) -> Then<Self, P>
    where
        Self: Sized
, { ... }
fn chain<T, U, P: Parser<I, U, Error = Self::Error>>(
        self,
        other: P
    ) -> Map<Then<Self, P>, fn(_: (O, U)) -> Vec<T>, (O, U)>
    where
        Self: Sized,
        U: Chain<T>,
        O: Chain<T>
, { ... }
fn flatten<T, Inner>(self) -> Map<Self, fn(_: O) -> Vec<T>, O>
    where
        Self: Sized,
        O: IntoIterator<Item = Inner>,
        Inner: IntoIterator<Item = T>
, { ... }
fn ignore_then<U, P: Parser<I, U>>(
        self,
        other: P
    ) -> IgnoreThen<Self, P, O, U>
    where
        Self: Sized
, { ... }
fn then_ignore<U, P: Parser<I, U>>(
        self,
        other: P
    ) -> ThenIgnore<Self, P, O, U>
    where
        Self: Sized
, { ... }
fn padded_by<U, P: Parser<I, U, Error = Self::Error> + Clone>(
        self,
        other: P
    ) -> ThenIgnore<IgnoreThen<P, Self, U, O>, P, O, U>
    where
        Self: Sized
, { ... }
fn delimited_by(self, start: I, end: I) -> DelimitedBy<Self, I>
    where
        Self: Sized
, { ... }
fn or<P: Parser<I, O>>(self, other: P) -> Or<Self, P>
    where
        Self: Sized
, { ... }
fn recover_with<S: Strategy<I, O>>(self, strategy: S) -> Recovery<Self, S>
    where
        Self: Sized
, { ... }
fn or_not(self) -> OrNot<Self>
    where
        Self: Sized
, { ... }
fn repeated(self) -> Repeated<Self>
    where
        Self: Sized
, { ... }
fn separated_by<U, P: Parser<I, U>>(
        self,
        other: P
    ) -> SeparatedBy<Self, P, U>
    where
        Self: Sized
, { ... }
fn boxed<'a>(self) -> BoxedParser<'a, I, O, Self::Error>
    where
        Self: Sized + 'a
, { ... }
}
Expand description

A trait implemented by parsers.

Parsers take a stream of tokens of type I and attempt to parse them into a value of type O. In doing so, they may encounter errors. These need not be fatal to the parsing process: syntactic errors can be recovered from and a valid output may still be generated alongside any syntax errors that were encountered along the way. Usually, this output comes in the form of an Abstract Syntax Tree (AST).

Parsers currently only support LL(1) grammars. More concretely, this means that the rules that compose this parser are only permitted to ‘look’ a single token into the future to determine the path through the grammar rules to be taken by the parser. Unlike other techniques, such as recursive decent, arbitrary backtracking is not permitted. The reasons for this are numerous, but perhaps the most obvious is that it makes error detection and recovery significantly simpler and easier. In the future, this crate may be extended to support more complex grammars.

LL(1) parsers by themselves are not particularly powerful. Indeed, even very old languages such as C cannot parsed by an LL(1) parser in a single pass. However, this limitation quickly vanishes (and, indeed, makes the design of both the language and the parser easier) when one introduces multiple passes. For example, C compilers generally have a lexical pass prior to the main parser that groups the input characters into tokens.

Associated Types

The type of errors emitted by this parser.

Required methods

👎 Deprecated:

This method is excluded from the semver guarantees of chumsky. If you decide to use it, broken builds are your fault.

Parse a stream with all the bells & whistles. You can use this to implement your own parser combinators. Note that both the signature and semantic requirements of this function are very likely to change in later versions. Where possible, prefer more ergonomic combinators provided elsewhere in the crate rather than implementing your own.

Provided methods

Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way.

If you don’t care about producing an output if errors are encountered, use Parser::parse instead.

Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way.

If you don’t care about producing an output if errors are encountered, use Parser::parse instead.

Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way. Unlike Parser::parse_recovery, this function will produce debugging output as it executes.

If you don’t care about producing an output if errors are encountered, use Parser::parse instead.

Parse a stream of tokens, yielding an output or any errors that were encountered along the way.

If you wish to attempt to produce an output even if errors are encountered, use Parser::parse_recovery.

Map the output of this parser to aanother value.

Examples

#[derive(Debug, PartialEq)]
enum Token { Word(String), Num(u64) }

let word = filter::<_, _, Cheap<char>>(|c: &char| c.is_alphabetic())
    .repeated().at_least(1)
    .collect::<String>()
    .map(Token::Word);

let num = filter::<_, _, Cheap<char>>(|c: &char| c.is_ascii_digit())
    .repeated().at_least(1)
    .collect::<String>()
    .map(|s| Token::Num(s.parse().unwrap()));

let token = word.or(num);

assert_eq!(token.parse("test"), Ok(Token::Word("test".to_string())));
assert_eq!(token.parse("42"), Ok(Token::Num(42)));

Map the output of this parser to another value, making use of the pattern’s span.

Map the primary error of this parser to another value.

After a successful parse, apply a fallible function to the output. If the function produces an error, treat it as a parsing error.

Label the pattern parsed by this parser for more useful error messages.

This is useful when you want to give users a more useful description of an expected pattern than simply a list of possible initial tokens. For example, it’s common to use the term “expression” at a catch-all for a number of different constructs in many languages.

This does not label recovered errors generated by sub-patterns within the parser, only error directly emitted by the parser.

This does not label errors where the labelled pattern consumed input (i.e: in unambiguous cases).

Examples

let frac = text::digits(10)
    .chain(just('.'))
    .chain::<char, _, _>(text::digits(10))
    .collect::<String>()
    .then_ignore(end())
    .labelled("number");

assert_eq!(frac.parse("42.3"), Ok("42.3".to_string()));
assert_eq!(frac.parse("hello"), Err(vec![Cheap::expected_input_found(0..1, None, Some('h')).with_label("number")]));
assert_eq!(frac.parse("42!"), Err(vec![Cheap::expected_input_found(2..3, Some('.'), Some('!')).with_label("number")]));

Transform all outputs of this parser to a pretermined value.

Examples

#[derive(Clone, Debug, PartialEq)]
enum Op { Add, Sub, Mul, Div }

let op = just::<_, Cheap<char>>('+').to(Op::Add)
    .or(just('-').to(Op::Sub))
    .or(just('*').to(Op::Mul))
    .or(just('/').to(Op::Div));

assert_eq!(op.parse("+"), Ok(Op::Add));
assert_eq!(op.parse("/"), Ok(Op::Div));

Left-fold the output of the parser into a single value, where the output is of type (_, Vec<_>).

Examples

let int = text::int::<char, Cheap<char>>(10)
    .collect::<String>()
    .map(|s| s.parse().unwrap());

let sum = int
    .then(just('+').ignore_then(int).repeated())
    .foldl(|a, b| a + b);

assert_eq!(sum.parse("1+12+3+9"), Ok(25));
assert_eq!(sum.parse("6"), Ok(6));

Right-fold the output of the parser into a single value, where the output is of type (Vec<_>, _).

Examples

let int = text::int::<char, Cheap<char>>(10)
    .collect::<String>()
    .map(|s| s.parse().unwrap());

let signed = just('+').to(1)
    .or(just('-').to(-1))
    .repeated()
    .then(int)
    .foldr(|a, b| a * b);

assert_eq!(signed.parse("3"), Ok(3));
assert_eq!(signed.parse("-17"), Ok(-17));
assert_eq!(signed.parse("--+-+-5"), Ok(5));

Ignore the output of this parser, yielding () as an output instead.

This can be used to reduce the cost of parsing by avoiding unnecessary allocations (most collections containing ZSTs do not allocate). For example, it’s common to want to ignore whitespace in many grammars.

Examples

// A parser that parses any number of whitespace characters without allocating
let whitespace = filter::<_, _, Cheap<char>>(|c: &char| c.is_whitespace())
    .ignored()
    .repeated();

assert_eq!(whitespace.parse("    "), Ok(vec![(); 4]));
assert_eq!(whitespace.parse("  hello"), Ok(vec![(); 2]));

Collect the output of this parser into a collection.

This is commonly useful for collecting Vec<char> outputs into Strings.

Examples

let word = filter::<_, _, Cheap<char>>(|c: &char| c.is_alphabetic())
    .repeated()
    .collect::<String>();

assert_eq!(word.parse("hello"), Ok("hello".to_string()));

Parse one thing and then another thing, yielding a tuple of the two outputs.

Examples

let word = filter::<_, _, Cheap<char>>(|c: &char| c.is_alphabetic())
    .repeated().at_least(1)
    .collect::<String>();
let two_words = word.then_ignore(just(' ')).then(word);

assert_eq!(two_words.parse("dog cat"), Ok(("dog".to_string(), "cat".to_string())));
assert!(two_words.parse("hedgehog").is_err());

Parse one thing and then another thing, attempting to chain the two outputs into a Vec.

Examples

let int = just('-').or_not()
    .chain(filter::<_, _, Cheap<char>>(|c: &char| c.is_ascii_digit() && *c != '0')
        .chain(filter::<_, _, Cheap<char>>(|c: &char| c.is_ascii_digit()).repeated()))
    .or(just('0').map(|c| vec![c]))
    .then_ignore(end())
    .collect::<String>()
    .map(|s| s.parse().unwrap());

assert_eq!(int.parse("0"), Ok(0));
assert_eq!(int.parse("415"), Ok(415));
assert_eq!(int.parse("-50"), Ok(-50));
assert!(int.parse("-0").is_err());
assert!(int.parse("05").is_err());

Flatten a nested collection.

Parse one thing and then another thing, yielding only the output of the latter.

Examples

let zeroes = filter::<_, _, Cheap<char>>(|c: &char| *c == '0').ignored().repeated();
let digits = filter(|c: &char| c.is_ascii_digit()).repeated();
let integer = zeroes
    .ignore_then(digits)
    .collect::<String>()
    .map(|s| s.parse().unwrap());

assert_eq!(integer.parse("00064"), Ok(64));
assert_eq!(integer.parse("32"), Ok(32));

Parse one thing and then another thing, yielding only the output of the former.

Examples

let word = filter::<_, _, Cheap<char>>(|c: &char| c.is_alphabetic())
    .repeated().at_least(1)
    .collect::<String>();

let punctuated = word
    .then_ignore(just('!').or(just('?')).or_not());

let sentence = punctuated
    .padded() // Allow for whitespace gaps
    .repeated();

assert_eq!(
    sentence.parse("hello! how are you?"),
    Ok(vec![
        "hello".to_string(),
        "how".to_string(),
        "are".to_string(),
        "you".to_string(),
    ]),
);

Parse the pattern surrounded by the given delimiters.

Examples

// A LISP-style S-expression
#[derive(Debug, PartialEq)]
enum SExpr {
    Ident(String),
    Num(u64),
    List(Vec<SExpr>),
}

let ident = filter::<_, _, Cheap<char>>(|c: &char| c.is_alphabetic())
    .repeated().at_least(1)
    .collect::<String>();

let num = text::int(10)
    .collect::<String>()
    .map(|s| s.parse().unwrap());

let s_expr = recursive(|s_expr| s_expr
    .padded()
    .repeated()
    .map(SExpr::List)
    .delimited_by('(', ')')
    .or(ident.map(SExpr::Ident))
    .or(num.map(SExpr::Num)));

// A valid input
assert_eq!(
    s_expr.parse_recovery("(add (mul 42 3) 15)"),
    (
        Some(SExpr::List(vec![
            SExpr::Ident("add".to_string()),
            SExpr::List(vec![
                SExpr::Ident("mul".to_string()),
                SExpr::Num(42),
                SExpr::Num(3),
            ]),
            SExpr::Num(15),
        ])),
        Vec::new(), // No errors!
    ),
);

Parse one thing or, on failure, another thing.

Examples

let op = just::<_, Cheap<char>>('+')
    .or(just('-'))
    .or(just('*'))
    .or(just('/'));

assert_eq!(op.parse("+"), Ok('+'));
assert_eq!(op.parse("/"), Ok('/'));
assert!(op.parse("!").is_err());

Apply a fallback recovery strategy to this parser should it fail.

There is no silver bullet for error recovery, so this function allows you to specify one of several different strategies at the location of your choice.

Note that for implementation reasons, adding an error recovery strategy can cause a parser to ‘over-commit’, missing potentially valid alternative parse routes (TODO: document this and explain why and how it happens). Rest assured that this case is generally quite rare and only happens for very loose, almost-ambiguous syntax. If you run into cases that you believe should parse but do not, try removing or moving recovery strategies to fix the problem.

Attempt to parse something, but only if it exists.

If parsing of the pattern is successful, the output is Some(_). Otherwise, the output is None.

Examples

let word = filter::<_, _, Cheap<char>>(|c: &char| c.is_alphabetic())
    .repeated().at_least(1)
    .collect::<String>();

let word_or_question = word
    .then(just('?').or_not());

assert_eq!(word_or_question.parse("hello?"), Ok(("hello".to_string(), Some('?'))));
assert_eq!(word_or_question.parse("wednesday"), Ok(("wednesday".to_string(), None)));

Parse an expression any number of times (including zero times).

Input is eagerly parsed. Be aware that the parser will accept no occurences of the pattern too. Consider using [Parser::repeated().at_least] instead if it better suits your use-case.

Examples

let num = filter::<_, _, Cheap<char>>(|c: &char| c.is_ascii_digit())
    .repeated().at_least(1)
    .collect::<String>()
    .map(|s| s.parse().unwrap());

let sum = num.then(just('+').ignore_then(num).repeated())
    .foldl(|a, b| a + b);

assert_eq!(sum.parse("2+13+4+0+5"), Ok(24));

Parse an expression, separated by another, any number of times.

You can call .allow_leading() or .allow_trailing() on the result to permit leading and trailing separators.

Box the parser, yielding a parser that performs parsing through dynamic dispatch.

Boxing a parser might be useful for:

  • Passing a parser over an FFI boundary

  • Getting around compiler implementation problems with long types such as this.

  • Places where you need to name the type of a parser

Boxing a parser is loosely equivalent to boxing other combinators, such as Iterator.

Implementations on Foreign Types

👎 Deprecated:

This method is excluded from the semver guarantees of chumsky. If you decide to use it, broken builds are your fault.

👎 Deprecated:

This method is excluded from the semver guarantees of chumsky. If you decide to use it, broken builds are your fault.

👎 Deprecated:

This method is excluded from the semver guarantees of chumsky. If you decide to use it, broken builds are your fault.

Implementors