Skip to main content

winnow/_tutorial/
chapter_2.rs

1//! # Chapter 2: Tokens and Tags
2//!
3//! The simplest *useful* parser you can write is one which matches tokens.
4//! In our case, tokens are `char`.
5//!
6//! ## Tokens
7//!
8//! [`Stream`] provides some core operations to help with parsing. For example, to process a
9//! single token, you can do:
10//! ```rust
11//! # use winnow::Parser;
12//! # use winnow::Result;
13//! use winnow::stream::Stream;
14//! use winnow::error::ParserError;
15//!
16//! fn parse_prefix(input: &mut &str) -> Result<char> {
17//!     let c = input.next_token().ok_or_else(|| {
18//!         ParserError::from_input(input)
19//!     })?;
20//!     if c != '0' {
21//!         return Err(ParserError::from_input(input));
22//!     }
23//!     Ok(c)
24//! }
25//!
26//! fn main()  {
27//!     let mut input = "0x1a2b Hello";
28//!
29//!     let output = parse_prefix.parse_next(&mut input).unwrap();
30//!
31//!     assert_eq!(input, "x1a2b Hello");
32//!     assert_eq!(output, '0');
33//!
34//!     assert!(parse_prefix.parse_next(&mut "d").is_err());
35//! }
36//! ```
37//!
38//! This extraction of a token is encapsulated in the [`any`] parser:
39//! ```rust
40//! # use winnow::Result;
41//! # use winnow::error::ParserError;
42//! use winnow::Parser;
43//! use winnow::token::any;
44//!
45//! fn parse_prefix(input: &mut &str) -> Result<char> {
46//!     let c = any
47//!         .parse_next(input)?;
48//!     if c != '0' {
49//!         return Err(ParserError::from_input(input));
50//!     }
51//!     Ok(c)
52//! }
53//! #
54//! # fn main()  {
55//! #     let mut input = "0x1a2b Hello";
56//! #
57//! #     let output = parse_prefix.parse_next(&mut input).unwrap();
58//! #
59//! #     assert_eq!(input, "x1a2b Hello");
60//! #     assert_eq!(output, '0');
61//! #
62//! #     assert!(parse_prefix.parse_next(&mut "d").is_err());
63//! # }
64//! ```
65//!
66//! Using the higher level [`any`] parser opens `parse_prefix` to the helpers on the [`Parser`] trait,
67//! like [`Parser::verify`] which fails a parse if a condition isn't met, like our check above:
68//! ```rust
69//! # use winnow::Result;
70//! use winnow::Parser;
71//! use winnow::token::any;
72//!
73//! fn parse_prefix(input: &mut &str) -> Result<char> {
74//!     let c = any
75//!         .verify(|c| *c == '0')
76//!         .parse_next(input)?;
77//!     Ok(c)
78//! }
79//! #
80//! # fn main()  {
81//! #     let mut input = "0x1a2b Hello";
82//! #
83//! #     let output = parse_prefix.parse_next(&mut input).unwrap();
84//! #
85//! #     assert_eq!(input, "x1a2b Hello");
86//! #     assert_eq!(output, '0');
87//! #
88//! #     assert!(parse_prefix.parse_next(&mut "d").is_err());
89//! # }
90//! ```
91//!
92//! Matching a single token literal is common enough that [`Parser`] is implemented for
93//! the `char` type, encapsulating both [`any`] and [`Parser::verify`]:
94//! ```rust
95//! # use winnow::Result;
96//! use winnow::Parser;
97//!
98//! fn parse_prefix(input: &mut &str) -> Result<char> {
99//!     let c = '0'.parse_next(input)?;
100//!     Ok(c)
101//! }
102//! #
103//! # fn main()  {
104//! #     let mut input = "0x1a2b Hello";
105//! #
106//! #     let output = parse_prefix.parse_next(&mut input).unwrap();
107//! #
108//! #     assert_eq!(input, "x1a2b Hello");
109//! #     assert_eq!(output, '0');
110//! #
111//! #     assert!(parse_prefix.parse_next(&mut "d").is_err());
112//! # }
113//! ```
114//!
115//! ## Tags
116//!
117//! [`Stream`] also supports processing slices of tokens:
118//! ```rust
119//! # use winnow::Parser;
120//! # use winnow::Result;
121//! use winnow::stream::Stream;
122//! use winnow::error::ParserError;
123//!
124//! fn parse_prefix<'s>(input: &mut &'s str) -> Result<&'s str> {
125//!     let expected = "0x";
126//!     if input.len() < expected.len() {
127//!         return Err(ParserError::from_input(input));
128//!     }
129//!     let actual = input.next_slice(expected.len());
130//!     if actual != expected {
131//!         return Err(ParserError::from_input(input));
132//!     }
133//!     Ok(actual)
134//! }
135//!
136//! fn main()  {
137//!     let mut input = "0x1a2b Hello";
138//!
139//!     let output = parse_prefix.parse_next(&mut input).unwrap();
140//!     assert_eq!(input, "1a2b Hello");
141//!     assert_eq!(output, "0x");
142//!
143//!     assert!(parse_prefix.parse_next(&mut "0o123").is_err());
144//! }
145//! ```
146//!
147//! Matching the input position against a string literal is encapsulated in the [`literal`] parser:
148//! ```rust
149//! # use winnow::Result;
150//! # use winnow::Parser;
151//! use winnow::token::literal;
152//!
153//! fn parse_prefix<'s>(input: &mut &'s str) -> Result<&'s str> {
154//!     let expected = "0x";
155//!     let actual = literal(expected).parse_next(input)?;
156//!     Ok(actual)
157//! }
158//! #
159//! # fn main()  {
160//! #     let mut input = "0x1a2b Hello";
161//! #
162//! #     let output = parse_prefix.parse_next(&mut input).unwrap();
163//! #     assert_eq!(input, "1a2b Hello");
164//! #     assert_eq!(output, "0x");
165//! #
166//! #     assert!(parse_prefix.parse_next(&mut "0o123").is_err());
167//! # }
168//! ```
169//!
170//! Like for a single token, matching a string literal is common enough that [`Parser`] is implemented for the `&str` type:
171//! ```rust
172//! # use winnow::Result;
173//! use winnow::Parser;
174//!
175//! fn parse_prefix<'s>(input: &mut &'s str) -> Result<&'s str> {
176//!     let actual = "0x".parse_next(input)?;
177//!     Ok(actual)
178//! }
179//! #
180//! # fn main()  {
181//! #     let mut input = "0x1a2b Hello";
182//! #
183//! #     let output = parse_prefix.parse_next(&mut input).unwrap();
184//! #     assert_eq!(input, "1a2b Hello");
185//! #     assert_eq!(output, "0x");
186//! #
187//! #     assert!(parse_prefix.parse_next(&mut "0o123").is_err());
188//! # }
189//! ```
190//!
191//! See [`token`] for additional individual and token-slice parsers.
192//!
193//! ## Character Classes
194//!
195//! Selecting a single `char` or a [`literal`] is fairly limited. Sometimes, you will want to select one of several
196//! `chars` of a specific class, like digits. For this, we use the [`one_of`] parser:
197//!
198//! ```rust
199//! # use winnow::Parser;
200//! # use winnow::Result;
201//! use winnow::token::one_of;
202//!
203//! fn parse_digits(input: &mut &str) -> Result<char> {
204//!     one_of(('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input)
205//! }
206//!
207//! fn main() {
208//!     let mut input = "1a2b Hello";
209//!
210//!     let output = parse_digits.parse_next(&mut input).unwrap();
211//!     assert_eq!(input, "a2b Hello");
212//!     assert_eq!(output, '1');
213//!
214//!     assert!(parse_digits.parse_next(&mut "Z").is_err());
215//! }
216//! ```
217//!
218//! > **Aside:** [`one_of`] might look straightforward, a function returning a value that implements `Parser`.
219//! > Let's look at it more closely as its used above (resolving all generic parameters):
220//! > ```rust
221//! > # use winnow::prelude::*;
222//! > # use winnow::error::ContextError;
223//! > pub fn one_of<'i>(
224//! >     list: &'static [char]
225//! > ) -> impl Parser<&'i str, char, ContextError> {
226//! >     // ...
227//! > #    winnow::token::one_of(list)
228//! > }
229//! > ```
230//! > If you have not programmed in a language where functions are values, the type signature of the
231//! > [`one_of`] function might be a surprise.
232//! > The function [`one_of`] *returns a function*. The function it returns is a
233//! > [`Parser`], taking a `&str` and returning an [`Result`]. This is a common pattern in winnow for
234//! > configurable or stateful parsers.
235//!
236//! Some of character classes are common enough that a named parser is provided, like with:
237//! - [`line_ending`][crate::ascii::line_ending]: Recognizes an end of line (both `\n` and `\r\n`)
238//! - [`newline`][crate::ascii::newline]: Matches a newline character `\n`
239//! - [`tab`][crate::ascii::tab]: Matches a tab character `\t`
240//!
241//! You can then capture sequences of these characters with parsers like [`take_while`].
242//! ```rust
243//! # use winnow::Parser;
244//! # use winnow::Result;
245//! use winnow::token::take_while;
246//!
247//! fn parse_digits<'s>(input: &mut &'s str) -> Result<&'s str> {
248//!     take_while(1.., ('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input)
249//! }
250//!
251//! fn main() {
252//!     let mut input = "1a2b Hello";
253//!
254//!     let output = parse_digits.parse_next(&mut input).unwrap();
255//!     assert_eq!(input, " Hello");
256//!     assert_eq!(output, "1a2b");
257//!
258//!     assert!(parse_digits.parse_next(&mut "Z").is_err());
259//! }
260//! ```
261//!
262//! We could simplify this further by using one of the built-in character classes, [`hex_digit1`]:
263//! ```rust
264//! # use winnow::Parser;
265//! # use winnow::Result;
266//! use winnow::ascii::hex_digit1;
267//!
268//! fn parse_digits<'s>(input: &mut &'s str) -> Result<&'s str> {
269//!     hex_digit1.parse_next(input)
270//! }
271//!
272//! fn main() {
273//!     let mut input = "1a2b Hello";
274//!
275//!     let output = parse_digits.parse_next(&mut input).unwrap();
276//!     assert_eq!(input, " Hello");
277//!     assert_eq!(output, "1a2b");
278//!
279//!     assert!(parse_digits.parse_next(&mut "Z").is_err());
280//! }
281//! ```
282//!
283//! See [`ascii`] for more text-based parsers.
284
285#![allow(unused_imports)]
286use crate::ascii;
287use crate::ascii::hex_digit1;
288use crate::stream::ContainsToken;
289use crate::stream::Stream;
290use crate::token;
291use crate::token::any;
292use crate::token::literal;
293use crate::token::one_of;
294use crate::token::take_while;
295use crate::Parser;
296use crate::Result;
297use std::ops::RangeInclusive;
298
299pub use super::chapter_1 as previous;
300pub use super::chapter_3 as next;
301pub use crate::_tutorial as table_of_contents;