dlexer/lib.rs
1//! # DLexer: A Parser Combinator Library
2//!
3//! `dlexer` is a flexible and composable parser combinator library for Rust, inspired by
4//! libraries like Parsec in Haskell. It provides a rich set of tools for building
5//! robust parsers for both text-based and binary formats.
6//!
7//! ## Core Concepts
8//!
9//! The library is built around the [`parsec::Parsec`] type, which represents a parser.
10//! These parsers can be combined and transformed using a variety of functions and
11//! operators to build up complex parsing logic from simple, reusable components.
12//!
13//! ## Key Modules
14//!
15//! - [`parsec`]: The core parser combinator library. Contains the `Parsec` struct
16//! and fundamental combinators like `map`, `bind`, `or`, `many`, etc.
17//! - [`lex`]: Provides tools for lexical analysis, including "skippers" for handling
18//! whitespace and comments, and token-level parsers for common data types like
19//! integers and symbols.
20//! - [`binary`]: A specialized module for parsing binary data, with parsers for
21//! various integer and float types with controlled endianness.
22//! - [`errors`]: Defines the error handling system, including the `ParserError` trait.
23//!
24//! ## Getting Started
25//!
26//! Here is a simple example of parsing a comma-separated list of numbers:
27//!
28//! ```
29//! use dlexer::lex::{integer, WhitespaceSkipper};
30//! use dlexer::parsec::*;
31//!
32//! // A parser for a single decimal integer, handling surrounding whitespace.
33//! let number_parser = integer(10);
34//!
35//! // A parser for a list of numbers separated by commas.
36//! let list_parser = number_parser.sep(char(','));
37//!
38//! // Run the parser on some input.
39//! let result = list_parser.parse("1, 2, 3", WhitespaceSkipper);
40//!
41//! assert_eq!(result.unwrap(), vec![1, 2, 3]);
42//! ```
43//!
44//! ## Macros
45//!
46//! The library also provides helpful macros like [`do_parse!`] for monadic chaining
47//! and [`map!`] for mapping multiple parsers to values.
48pub mod binary;
49pub mod errors;
50pub mod lex;
51pub mod parsec;
52
53//mod stream;
54
55mod examples;
56
57/// A convenience macro for mapping multiple parsers to specific values.
58///
59/// This macro creates a new parser that tries each provided parser in sequence.
60/// If a parser succeeds, it returns the corresponding value. This is a concise
61/// alternative to chaining multiple `or` calls with `map`.
62///
63/// # Syntax
64///
65/// `map!(parser1 => value1, parser2 => value2, ...)`
66///
67/// # Example
68///
69/// ```
70/// use dlexer::lex::symbol;
71/// use dlexer::map;
72/// use dlexer::parsec::*;
73///
74/// #[derive(Debug, PartialEq)]
75/// enum Keyword {
76/// Let,
77/// If,
78/// Else,
79/// }
80///
81/// let keyword_parser = map!(
82/// symbol("let") => Keyword::Let,
83/// symbol("if") => Keyword::If,
84/// symbol("else") => Keyword::Else
85/// );
86///
87/// assert_eq!(keyword_parser.test("if").unwrap(), Keyword::If);
88/// assert!(keyword_parser.test("other").is_err());
89/// ```
90#[macro_export]
91macro_rules! map {
92 ($($parser:expr => $value:expr),*) => {
93 $(
94 $parser.map(|_| $value)
95 )|*
96 };
97}
98
99/// A macro for writing parsers in a sequential, do-notation style.
100///
101/// This macro provides a more imperative-looking syntax for chaining parsers,
102/// which can be more readable than deeply nested calls to `bind` and `then`.
103///
104/// # Syntax
105///
106/// - `let% <var> = <parser>;` : Binds the result of `<parser>` to `<var>`. This is equivalent to `bind`.
107/// - `<parser>;` : Runs `<parser>` and discards its result. This is equivalent to `then`.
108/// - `let <var> = <expr>;` : Binds the result of a standard Rust expression to `<var>`.
109/// - The last expression in the block is the final parser, which determines the return value.
110///
111/// # Example
112///
113/// ```
114/// use dlexer::do_parse;
115/// use dlexer::parsec::*;
116///
117/// // A parser for a pair of numbers in parentheses, like "(1, 2)".
118/// let pair_parser: BasicParser = do_parse!(
119/// char('(');
120/// let% x = decimal_digit();
121/// char(',');
122/// let% y = decimal_digit();
123/// char(')');
124/// pure((x, y)) // The return value
125/// );
126///
127/// let result = pair_parser.test("(3,4)").unwrap();
128/// assert_eq!(result, ('3', '4'));
129/// ```
130#[macro_export]
131macro_rules! do_parse {
132 ($e:expr) => {
133 $e
134 };
135
136 (let% $v:ident = $m:expr; $($rest:tt)*) => {
137 $m.bind(move |$v| do_parse!($($rest)*))
138 };
139
140 (let $v:ident $(:$t: ty)? = $m:expr; $($rest:tt)*) => {
141 {let $v $(:$t)? = $m; do_parse!($($rest)*)}
142 };
143
144 ($m:expr; $($rest:tt)*) => {
145 $m.then(do_parse!($($rest)*))
146 };
147}
148
149#[cfg(test)]
150mod tests {
151 #![allow(dead_code)]
152
153 use crate::{
154 binary::{n_bytes, u32, BasicByteParser},
155 lex::{number, symbol, token, WhitespaceSkipper},
156 parsec::*,
157 };
158
159 type P = BasicParser;
160
161 #[test]
162 fn it_works() {
163 #[derive(Debug, Clone)]
164 enum AST {
165 Identifier(String),
166 Boolean(bool),
167 }
168
169 // Identifier parsing example
170 let ident: With<P, AST> = token(do_parse!(
171 let% initial = (alpha() | char('_'));
172 let% rest = alphanumeric().many().collect::<String>();
173 let result = format!("{}{}", initial, rest);
174 pure(AST::Identifier(result))
175 ))
176 .expected("identifier");
177
178 // Applicative style identifier parsing
179 let _ident: With<P, AST> = token(
180 pure(AST::Identifier).apply(
181 (alpha() | char('_'))
182 .extend(alphanumeric().many())
183 .collect::<String>(),
184 ),
185 );
186
187 // Boolean parsing example
188 let boolean = token(map!(
189 symbol("true") => AST::Boolean(true),
190 symbol("false") => AST::Boolean(false)
191 ));
192
193 let p = (boolean | ident).sep_till(char(','), eof());
194 let input = "foo, bar, a12, \ntrue, false";
195 let result = p.test(input);
196 match result {
197 Ok(a) => {
198 println!("Parsed successfully: {:?}", a);
199 }
200 Err(e) => println!("{}", e),
201 }
202 }
203
204 #[test]
205 fn util_test() {
206 let p: With<P, _> = token(any().many1_till(char('<')).collect::<String>());
207 let input = "fo o < bar";
208 match p.parse(input, WhitespaceSkipper) {
209 Ok(a) => {
210 println!("Parsed successfully: {:?}", a);
211 }
212 Err(e) => println!("{}", e),
213 }
214 }
215
216 #[test]
217 fn escape_test() {
218 let escapes: With<P, _> = map!(
219 symbol("\\n") => "\n",
220 symbol("\\t") => "\t",
221 symbol("\\r") => "\r",
222 symbol("\\\\") => "\\",
223 symbol("\\\"") => "\""
224 );
225
226 let string = token(
227 (escapes.into() | any().not('\"').into())
228 .many()
229 .between(char('"'), char('"'))
230 & |s: Vec<String>| s.join(""),
231 );
232
233 let result = string
234 .test(r#""This is a string with an escape: \n and a quote: \" and a backslash: \\""#);
235 match result {
236 Ok(a) => {
237 println!("Parsed successfully:\n {}", a);
238 }
239 Err(e) => println!("{}", e),
240 }
241 }
242
243 #[test]
244 fn take_test() {
245 let p: With<P, _> = char('a').take(1..2).collect::<String>();
246 let input = "";
247 match p.dbg().test(input) {
248 Ok(a) => {
249 println!("Parsed successfully: {:?}", a);
250 }
251 Err(e) => println!("{}", e),
252 }
253 }
254
255 type BP = BasicByteParser;
256 #[test]
257 fn binary_test() {
258 let p: With<BP, _> = do_parse!(
259 let% length = u32().dbg_();
260 let% bytes = n_bytes(length as usize);
261 eof();
262 pure(bytes)
263 );
264
265 let input = [0x00, 0x00, 0x00, 0x04, 0xAA, 0xBB, 0xCC, 0xDD]; // Represents "abcd" with length 4
266 match p.parse(&input) {
267 Ok(a) => println!("Parsed successfully: {:?}", a),
268 Err(e) => println!("{}", e),
269 }
270 }
271
272 #[test]
273 fn chain_test() {
274 let p: With<P, _> = number().chain1(char('+').map(|_| |x, y| x + y));
275 let input = "1+2+3+4";
276 match p.parse(input, WhitespaceSkipper) {
277 Ok(a) => {
278 println!("Parsed successfully: {:?}", a);
279 assert_eq!(a, 10.0);
280 }
281 Err(e) => {
282 println!("{}", e);
283 panic!("chain_test failed");
284 }
285 }
286 }
287}