winnow/_topic/language.rs
1//! # Elements of Programming Languages
2//!
3//! These are short recipes for accomplishing common tasks.
4//!
5//! * [Whitespace](#whitespace)
6//! + [Wrapper combinators that eat whitespace before and after a parser](#wrapper-combinators-that-eat-whitespace-before-and-after-a-parser)
7//! * [Comments](#comments)
8//! + [`// C++/EOL-style comments`](#-ceol-style-comments)
9//! + [`/* C-style comments */`](#-c-style-comments-)
10//! * [Identifiers](#identifiers)
11//! + [`Rust-Style Identifiers`](#rust-style-identifiers)
12//! * [Literal Values](#literal-values)
13//! + [Escaped Strings](#escaped-strings)
14//! + [Integers](#integers)
15//! - [Hexadecimal](#hexadecimal)
16//! - [Octal](#octal)
17//! - [Binary](#binary)
18//! - [Decimal](#decimal)
19//! + [Floating Point Numbers](#floating-point-numbers)
20//! * [C-style Expressions](#c-style-expressions)
21//!
22//! ## Whitespace
23//!
24//!
25//!
26//! ### Wrapper combinators that eat whitespace before and after a parser
27//!
28//! ```rust
29//! use winnow::prelude::*;
30//! use winnow::{
31//! error::ParserError,
32//! combinator::delimited,
33//! ascii::multispace0,
34//! };
35//!
36//! /// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and
37//! /// trailing whitespace, returning the output of `inner`.
38//! fn ws<'a, F, O, E: ParserError<&'a str>>(inner: F) -> impl Parser<&'a str, O, E>
39//! where
40//! F: Parser<&'a str, O, E>,
41//! {
42//! delimited(
43//! multispace0,
44//! inner,
45//! multispace0
46//! )
47//! }
48//! ```
49//!
50//! To eat only trailing whitespace, replace `delimited(...)` with `terminated(&inner, multispace0)`.
51//! Likewise, the eat only leading whitespace, replace `delimited(...)` with `preceded(multispace0,
52//! &inner)`. You can use your own parser instead of `multispace0` if you want to skip a different set
53//! of lexemes.
54//!
55//! ## Comments
56//!
57//! ### `// C++/EOL-style comments`
58//!
59//! This version uses `%` to start a comment, does not consume the newline character, and returns an
60//! output of `()`.
61//!
62//! ```rust
63//! use winnow::prelude::*;
64//! use winnow::{
65//! error::ParserError,
66//! token::take_till,
67//! };
68//!
69//! pub fn peol_comment<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> ModalResult<(), E>
70//! {
71//! ('%', take_till(1.., ['\n', '\r']))
72//! .void() // Output is thrown away.
73//! .parse_next(i)
74//! }
75//! ```
76//!
77//! ### `/* C-style comments */`
78//!
79//! Inline comments surrounded with sentinel literals `(*` and `*)`. This version returns an output of `()`
80//! and does not handle nested comments.
81//!
82//! ```rust
83//! use winnow::prelude::*;
84//! use winnow::{
85//! error::ParserError,
86//! token::take_until,
87//! };
88//!
89//! pub fn pinline_comment<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> ModalResult<(), E> {
90//! (
91//! "(*",
92//! take_until(0.., "*)"),
93//! "*)"
94//! )
95//! .void() // Output is thrown away.
96//! .parse_next(i)
97//! }
98//! ```
99//!
100//! ## Identifiers
101//!
102//! ### `Rust-Style Identifiers`
103//!
104//! Parsing identifiers that may start with a letter (or underscore) and may contain underscores,
105//! letters and numbers may be parsed like this:
106//!
107//! ```rust
108//! use winnow::prelude::*;
109//! use winnow::{
110//! stream::AsChar,
111//! token::take_while,
112//! token::one_of,
113//! };
114//!
115//! pub fn identifier<'s>(input: &mut &'s str) -> ModalResult<&'s str> {
116//! (
117//! one_of(|c: char| c.is_alpha() || c == '_'),
118//! take_while(0.., |c: char| c.is_alphanum() || c == '_')
119//! )
120//! .take()
121//! .parse_next(input)
122//! }
123//! ```
124//!
125//! Let's say we apply this to the identifier `hello_world123abc`. The first element of the tuple
126//! would uses [`one_of`][crate::token::one_of] which would take `h`. The tuple ensures that
127//! `ello_world123abc` will be piped to the next [`take_while`][crate::token::take_while] parser,
128//! which takes every remaining character. However, the tuple returns a tuple of the results
129//! of its sub-parsers. The [`take`][crate::Parser::take] parser produces a `&str` of the
130//! input text that was parsed, which in this case is the entire `&str` `hello_world123abc`.
131//!
132//! ## Literal Values
133//!
134//! ### Escaped Strings
135//!
136//! ```rust
137#![doc = include_str!("../../examples/string/parser.rs")]
138//! ```
139//!
140//! See also [`take_escaped`] and [`escaped`].
141//!
142//! ### Integers
143//!
144//! The following recipes all return string slices rather than integer values. How to obtain an
145//! integer value instead is demonstrated for hexadecimal integers. The others are similar.
146//!
147//! The parsers allow the grouping character `_`, which allows one to group the digits by byte, for
148//! example: `0xA4_3F_11_28`. If you prefer to exclude the `_` character, the lambda to convert from a
149//! string slice to an integer value is slightly simpler. You can also strip the `_` from the string
150//! slice that is returned, which is demonstrated in the second hexadecimal number parser.
151//!
152//! #### Hexadecimal
153//!
154//! The parser outputs the string slice of the digits without the leading `0x`/`0X`.
155//!
156//! ```rust
157//! use winnow::prelude::*;
158//! use winnow::{
159//! combinator::alt,
160//! combinator::repeat,
161//! combinator::{preceded, terminated},
162//! token::one_of,
163//! };
164//!
165//! fn hexadecimal<'s>(input: &mut &'s str) -> ModalResult<&'s str> { // <'a, E: ParserError<&'a str>>
166//! preceded(
167//! alt(("0x", "0X")),
168//! repeat(1..,
169//! terminated(one_of(('0'..='9', 'a'..='f', 'A'..='F')), repeat(0.., '_').map(|()| ()))
170//! ).map(|()| ()).take()
171//! ).parse_next(input)
172//! }
173//! ```
174//!
175//! If you want it to return the integer value instead, use map:
176//!
177//! ```rust
178//! use winnow::prelude::*;
179//! use winnow::{
180//! combinator::alt,
181//! combinator::repeat,
182//! combinator::{preceded, terminated},
183//! token::one_of,
184//! };
185//!
186//! fn hexadecimal_value(input: &mut &str) -> ModalResult<i64> {
187//! preceded(
188//! alt(("0x", "0X")),
189//! repeat(1..,
190//! terminated(one_of(('0'..='9', 'a'..='f', 'A'..='F')), repeat(0.., '_').map(|()| ()))
191//! ).map(|()| ()).take()
192//! ).try_map(
193//! |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16)
194//! ).parse_next(input)
195//! }
196//! ```
197//!
198//! See also [`hex_uint`]
199//!
200//! #### Octal
201//!
202//! ```rust
203//! use winnow::prelude::*;
204//! use winnow::{
205//! combinator::alt,
206//! combinator::repeat,
207//! combinator::{preceded, terminated},
208//! token::one_of,
209//! };
210//!
211//! fn octal<'s>(input: &mut &'s str) -> ModalResult<&'s str> {
212//! preceded(
213//! alt(("0o", "0O")),
214//! repeat(1..,
215//! terminated(one_of('0'..='7'), repeat(0.., '_').map(|()| ()))
216//! ).map(|()| ()).take()
217//! ).parse_next(input)
218//! }
219//! ```
220//!
221//! #### Binary
222//!
223//! ```rust
224//! use winnow::prelude::*;
225//! use winnow::{
226//! combinator::alt,
227//! combinator::repeat,
228//! combinator::{preceded, terminated},
229//! token::one_of,
230//! };
231//!
232//! fn binary<'s>(input: &mut &'s str) -> ModalResult<&'s str> {
233//! preceded(
234//! alt(("0b", "0B")),
235//! repeat(1..,
236//! terminated(one_of('0'..='1'), repeat(0.., '_').map(|()| ()))
237//! ).map(|()| ()).take()
238//! ).parse_next(input)
239//! }
240//! ```
241//!
242//! #### Decimal
243//!
244//! ```rust
245//! use winnow::prelude::*;
246//! use winnow::{
247//! combinator::repeat,
248//! combinator::terminated,
249//! token::one_of,
250//! };
251//!
252//! fn decimal<'s>(input: &mut &'s str) -> ModalResult<&'s str> {
253//! repeat(1..,
254//! terminated(one_of('0'..='9'), repeat(0.., '_').map(|()| ()))
255//! ).map(|()| ())
256//! .take()
257//! .parse_next(input)
258//! }
259//! ```
260//!
261//! See also [`dec_uint`] and [`dec_int`]
262//!
263//! ### Floating Point Numbers
264//!
265//! The following is adapted from [the Python parser by Valentin Lorentz](https://github.com/ProgVal/rust-python-parser/blob/master/src/numbers.rs).
266//!
267//! ```rust
268//! use winnow::prelude::*;
269//! use winnow::{
270//! combinator::alt,
271//! combinator::repeat,
272//! combinator::opt,
273//! combinator::{preceded, terminated},
274//! token::one_of,
275//! };
276//!
277//! fn float<'s>(input: &mut &'s str) -> ModalResult<&'s str> {
278//! alt((
279//! // Case one: .42
280//! (
281//! '.',
282//! decimal,
283//! opt((
284//! one_of(['e', 'E']),
285//! opt(one_of(['+', '-'])),
286//! decimal
287//! ))
288//! ).take()
289//! , // Case two: 42e42 and 42.42e42
290//! (
291//! decimal,
292//! opt(preceded(
293//! '.',
294//! decimal,
295//! )),
296//! one_of(['e', 'E']),
297//! opt(one_of(['+', '-'])),
298//! decimal
299//! ).take()
300//! , // Case three: 42. and 42.42
301//! (
302//! decimal,
303//! '.',
304//! opt(decimal)
305//! ).take()
306//! )).parse_next(input)
307//! }
308//!
309//! fn decimal<'s>(input: &mut &'s str) -> ModalResult<&'s str> {
310//! repeat(1..,
311//! terminated(one_of('0'..='9'), repeat(0.., '_').map(|()| ()))
312//! ).
313//! map(|()| ())
314//! .take()
315//! .parse_next(input)
316//! }
317//! ```
318//!
319//! See also [`float`]
320//!
321//! ## C-style Expressions
322//!
323//! An example using the [`expression()`] parser to build an abstract syntax tree
324//! for C-style expressions.
325//!
326//! The operator precedence level is based on the [C language][c-precedence].
327//!
328//! [c-precedence]: https://en.cppreference.com/w/c/language/operator_precedence.html
329//!
330//! ```rust
331#![doc = include_str!("../../examples/c_expression/parser.rs")]
332//! ```
333
334#![allow(unused_imports)]
335use crate::ascii::dec_int;
336use crate::ascii::dec_uint;
337use crate::ascii::escaped;
338use crate::ascii::float;
339use crate::ascii::hex_uint;
340use crate::ascii::take_escaped;
341use crate::combinator::expression;