dlexer/
lib.rs

1//! # DLexer: A Parser Combinator Library
2//!
3//! `dlexer` is a flexible and composable parser combinator library for Rust, inspired by
4//! libraries like Parsec in Haskell. It provides a rich set of tools for building
5//! robust parsers for both text-based and binary formats.
6//!
7//! ## Core Concepts
8//!
9//! The library is built around the [`parsec::Parsec`] type, which represents a parser.
10//! These parsers can be combined and transformed using a variety of functions and
11//! operators to build up complex parsing logic from simple, reusable components.
12//!
13//! ## Key Modules
14//!
15//! - [`parsec`]: The core parser combinator library. Contains the `Parsec` struct
16//!   and fundamental combinators like `map`, `bind`, `or`, `many`, etc.
17//! - [`lex`]: Provides tools for lexical analysis, including "skippers" for handling
18//!   whitespace and comments, and token-level parsers for common data types like
19//!   integers and symbols.
20//! - [`binary`]: A specialized module for parsing binary data, with parsers for
21//!   various integer and float types with controlled endianness.
22//! - [`errors`]: Defines the error handling system, including the `ParserError` trait.
23//!
24//! ## Getting Started
25//!
26//! Here is a simple example of parsing a comma-separated list of numbers:
27//!
28//! ```
29//! use dlexer::lex::{integer, WhitespaceSkipper};
30//! use dlexer::parsec::*;
31//!
32//! // A parser for a single decimal integer, handling surrounding whitespace.
33//! let number_parser = integer(10);
34//!
35//! // A parser for a list of numbers separated by commas.
36//! let list_parser = number_parser.sep(char(','));
37//!
38//! // Run the parser on some input.
39//! let result = list_parser.parse("1, 2, 3", WhitespaceSkipper);
40//!
41//! assert_eq!(result.unwrap(), vec![1, 2, 3]);
42//! ```
43//!
44//! ## Macros
45//!
46//! The library also provides helpful macros like [`do_parse!`] for monadic chaining
47//! and [`map!`] for mapping multiple parsers to values.
48pub mod binary;
49pub mod errors;
50pub mod lex;
51pub mod parsec;
52
53//mod stream;
54
55mod examples;
56
57/// A convenience macro for mapping multiple parsers to specific values.
58///
59/// This macro creates a new parser that tries each provided parser in sequence.
60/// If a parser succeeds, it returns the corresponding value. This is a concise
61/// alternative to chaining multiple `or` calls with `map`.
62///
63/// # Syntax
64///
65/// `map!(parser1 => value1, parser2 => value2, ...)`
66///
67/// # Example
68///
69/// ```
70/// use dlexer::lex::symbol;
71/// use dlexer::map;
72/// use dlexer::parsec::*;
73///
74/// #[derive(Debug, PartialEq)]
75/// enum Keyword {
76///     Let,
77///     If,
78///     Else,
79/// }
80///
81/// let keyword_parser = map!(
82///     symbol("let") => Keyword::Let,
83///     symbol("if") => Keyword::If,
84///     symbol("else") => Keyword::Else
85/// );
86///
87/// assert_eq!(keyword_parser.test("if").unwrap(), Keyword::If);
88/// assert!(keyword_parser.test("other").is_err());
89/// ```
90#[macro_export]
91macro_rules! map {
92    ($($parser:expr => $value:expr),*) => {
93        $(
94            $parser.map(|_| $value)
95        )|*
96    };
97}
98
99/// A macro for writing parsers in a sequential, do-notation style.
100///
101/// This macro provides a more imperative-looking syntax for chaining parsers,
102/// which can be more readable than deeply nested calls to `bind` and `then`.
103///
104/// # Syntax
105///
106/// - `let% <var> = <parser>;` : Binds the result of `<parser>` to `<var>`. This is equivalent to `bind`.
107/// - `<parser>;` : Runs `<parser>` and discards its result. This is equivalent to `then`.
108/// - `let <var> = <expr>;` : Binds the result of a standard Rust expression to `<var>`.
109/// - The last expression in the block is the final parser, which determines the return value.
110///
111/// # Example
112///
113/// ```
114/// use dlexer::do_parse;
115/// use dlexer::parsec::*;
116///
117/// // A parser for a pair of numbers in parentheses, like "(1, 2)".
118/// let pair_parser: BasicParser = do_parse!(
119///     char('(');
120///     let% x = decimal_digit();
121///     char(',');
122///     let% y = decimal_digit();
123///     char(')');
124///     pure((x, y)) // The return value
125/// );
126///
127/// let result = pair_parser.test("(3,4)").unwrap();
128/// assert_eq!(result, ('3', '4'));
129/// ```
130#[macro_export]
131macro_rules! do_parse {
132    ($e:expr) => {
133        $e
134    };
135
136    (let% $v:ident = $m:expr; $($rest:tt)*) => {
137        $m.bind(move |$v| do_parse!($($rest)*))
138    };
139
140    (let $v:ident $(:$t: ty)? = $m:expr; $($rest:tt)*) => {
141        {let $v $(:$t)? = $m; do_parse!($($rest)*)}
142    };
143
144    ($m:expr; $($rest:tt)*) => {
145        $m.then(do_parse!($($rest)*))
146    };
147}
148
149#[cfg(test)]
150mod tests {
151    #![allow(dead_code)]
152
153    use crate::{
154        binary::{n_bytes, u32, BasicByteParser},
155        lex::{number, symbol, token, WhitespaceSkipper},
156        parsec::*,
157    };
158
159    type P = BasicParser;
160
161    #[test]
162    fn it_works() {
163        #[derive(Debug, Clone)]
164        enum AST {
165            Identifier(String),
166            Boolean(bool),
167        }
168
169        // Identifier parsing example
170        let ident: With<P, AST> = token(do_parse!(
171            let% initial = (alpha() | char('_'));
172            let% rest    = alphanumeric().many().collect::<String>();
173            let  result  = format!("{}{}", initial, rest);
174            pure(AST::Identifier(result))
175        ))
176        .expected("identifier");
177
178        // Applicative style identifier parsing
179        let _ident: With<P, AST> = token(
180            pure(AST::Identifier).apply(
181                (alpha() | char('_'))
182                    .extend(alphanumeric().many())
183                    .collect::<String>(),
184            ),
185        );
186
187        // Boolean parsing example
188        let boolean = token(map!(
189            symbol("true") => AST::Boolean(true),
190            symbol("false") => AST::Boolean(false)
191        ));
192
193        let p = (boolean | ident).sep_till(char(','), eof());
194        let input = "foo, bar, a12, \ntrue, false";
195        let result = p.test(input);
196        match result {
197            Ok(a) => {
198                println!("Parsed successfully: {:?}", a);
199            }
200            Err(e) => println!("{}", e),
201        }
202    }
203
204    #[test]
205    fn util_test() {
206        let p: With<P, _> = token(any().many1_till(char('<')).collect::<String>());
207        let input = "fo o < bar";
208        match p.parse(input, WhitespaceSkipper) {
209            Ok(a) => {
210                println!("Parsed successfully: {:?}", a);
211            }
212            Err(e) => println!("{}", e),
213        }
214    }
215
216    #[test]
217    fn escape_test() {
218        let escapes: With<P, _> = map!(
219            symbol("\\n") => "\n",
220            symbol("\\t") => "\t",
221            symbol("\\r") => "\r",
222            symbol("\\\\") => "\\",
223            symbol("\\\"") => "\""
224        );
225
226        let string = token(
227            (escapes.into() | any().not('\"').into())
228                .many()
229                .between(char('"'), char('"'))
230                & |s: Vec<String>| s.join(""),
231        );
232
233        let result = string
234            .test(r#""This is a string with an escape: \n and a quote: \" and a backslash: \\""#);
235        match result {
236            Ok(a) => {
237                println!("Parsed successfully:\n {}", a);
238            }
239            Err(e) => println!("{}", e),
240        }
241    }
242
243    #[test]
244    fn take_test() {
245        let p: With<P, _> = char('a').take(1..2).collect::<String>();
246        let input = "";
247        match p.dbg().test(input) {
248            Ok(a) => {
249                println!("Parsed successfully: {:?}", a);
250            }
251            Err(e) => println!("{}", e),
252        }
253    }
254
255    type BP = BasicByteParser;
256    #[test]
257    fn binary_test() {
258        let p: With<BP, _> = do_parse!(
259            let% length = u32().dbg_();
260            let% bytes = n_bytes(length as usize);
261            eof();
262            pure(bytes)
263        );
264
265        let input = [0x00, 0x00, 0x00, 0x04, 0xAA, 0xBB, 0xCC, 0xDD]; // Represents "abcd" with length 4
266        match p.parse(&input) {
267            Ok(a) => println!("Parsed successfully: {:?}", a),
268            Err(e) => println!("{}", e),
269        }
270    }
271
272    #[test]
273    fn chain_test() {
274        let p: With<P, _> = number().chain1(char('+').map(|_| |x, y| x + y));
275        let input = "1+2+3+4";
276        match p.parse(input, WhitespaceSkipper) {
277            Ok(a) => {
278                println!("Parsed successfully: {:?}", a);
279                assert_eq!(a, 10.0);
280            }
281            Err(e) => {
282                println!("{}", e);
283                panic!("chain_test failed");
284            }
285        }
286    }
287}