Skip to main content

lex_just_parse/
parser.rs

1//! Combinator parsing utilities.
2//!
3//! Provides the core `Parser` type and related type definitions
4//! for building lexer-driven parsers.
5
6use crate::lexer::Lexer;
7
8/// A mutable reference to a `Lexer`, commonly used by parser functions to consume tokens.
9pub type RefLexer<'lex> = &'lex mut Lexer<'lex>;
10
11/// Represents a typical parser function signature that takes a lexer reference and returns a `Parser` result.
12pub type ParserFn<'lex, T, E> = fn(lex: RefLexer) -> Parser<T, E>;
13
14#[macro_export]
15/// The `?` operator for [`Parser`]
16macro_rules! try_parse {
17    ($f:expr) => {
18        match $f {
19            Parser::Success(lexer, expr) => (lexer, expr),
20            Parser::Fail(lexer, e) => return Parser::Fail(lexer, e),
21        }
22    };
23    ($lex:ident, $f:expr) => {
24        match $f {
25            Parser::Success(lexer, expr) => {
26                $lex = lexer;
27                expr
28            }
29            Parser::Fail(lexer, e) => {
30                $lex = lexer;
31                return Parser::Fail($lex, e);
32            }
33        }
34    };
35}
36
37/// Represents the result of a parsing operation.
38///
39/// A parser either succeeds with an advanced lexer and a parsed value `T`, or fails
40/// and returns the unchanged lexer state along with an error `E`.
41pub enum Parser<'lex, T, E> {
42    Success(RefLexer<'lex>, T),
43    Fail(RefLexer<'lex>, E),
44}
45
46impl<'lex, T, E> Parser<'lex, T, E> {
47    /// Chains another parsing attempt if the current parser failed.
48    pub fn or_else<F>(self, f: F) -> Self
49    where
50        F: FnOnce(RefLexer) -> Parser<T, E>,
51    {
52        match self {
53            Parser::Success(..) => self,
54            Parser::Fail(lexer, ..) => f(lexer),
55        }
56    }
57
58    /// Chains a subsequent parser if the current parser succeeds.
59    pub fn and_then<U, F>(self, f: F) -> Parser<'lex, U, E>
60    where
61        F: FnOnce(RefLexer<'lex>, T) -> Parser<'lex, U, E>,
62    {
63        match self {
64            Parser::Success(lexer, e) => f(lexer, e),
65            Parser::Fail(lexer, e) => Parser::Fail(lexer, e),
66        }
67    }
68
69    /// Converts this parser result into a standard `Result`,
70    /// returning the parsed item or the lexer and error pair upon failure.
71    pub fn success(self) -> Result<T, (RefLexer<'lex>, E)> {
72        match self {
73            Parser::Success(_, e) => Ok(e),
74            Parser::Fail(lex, e) => Err((lex, e)),
75        }
76    }
77}
78
79/// Parses zero or more occurrences of `parser` until it fails.
80/// Returns the collected items and the updated lexer.
81pub fn many<'lex, T, E, F>(mut lex: RefLexer<'lex>, parser: F) -> Parser<'lex, Vec<T>, E>
82where
83    F: Fn(RefLexer<'lex>) -> Parser<'lex, T, E>,
84{
85    let mut results = Vec::new();
86    loop {
87        match parser(lex) {
88            Parser::Success(next_lex, val) => {
89                results.push(val);
90                lex = next_lex;
91            }
92            Parser::Fail(next_lex, _) => {
93                return Parser::Success(next_lex, results);
94            }
95        }
96    }
97}
98
99/// Parses one or more occurrences of `parser`.
100/// Returns Fail if the first attempt fails.
101pub fn many1<'lex, T, E, F>(lex: RefLexer<'lex>, parser: F) -> Parser<'lex, Vec<T>, E>
102where
103    F: Fn(RefLexer<'lex>) -> Parser<'lex, T, E>,
104{
105    match parser(lex) {
106        Parser::Success(lex, first_val) => {
107            let mut results = vec![first_val];
108            let mut current_lex = lex;
109            loop {
110                match parser(current_lex) {
111                    Parser::Success(next_lex, val) => {
112                        results.push(val);
113                        current_lex = next_lex;
114                    }
115                    Parser::Fail(next_lex, _) => {
116                        return Parser::Success(next_lex, results);
117                    }
118                }
119            }
120        }
121        Parser::Fail(lex, err) => Parser::Fail(lex, err),
122    }
123}
124
125/// Parses zero or more occurrences of `parser` separated by `separator`.
126pub fn sep_by<'lex, T, S, E, F, G>(
127    lex: RefLexer<'lex>,
128    parser: F,
129    separator: G,
130) -> Parser<'lex, Vec<T>, E>
131where
132    F: Fn(RefLexer<'lex>) -> Parser<'lex, T, E>,
133    G: Fn(RefLexer<'lex>) -> Parser<'lex, S, E>,
134{
135    match parser(lex) {
136        Parser::Success(lex, first_val) => {
137            let mut results = vec![first_val];
138            let mut current_lex = lex;
139            loop {
140                match separator(current_lex) {
141                    Parser::Success(sep_lex, _) => match parser(sep_lex) {
142                        Parser::Success(next_lex, val) => {
143                            results.push(val);
144                            current_lex = next_lex;
145                        }
146                        Parser::Fail(fail_lex, _) => {
147                            return Parser::Success(fail_lex, results);
148                        }
149                    },
150                    Parser::Fail(next_lex, _) => {
151                        return Parser::Success(next_lex, results);
152                    }
153                }
154            }
155        }
156        Parser::Fail(lex, _) => Parser::Success(lex, Vec::new()),
157    }
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163    use crate::lexer::{Lexer, TokenKind};
164
165    fn parse_ident<'lex>(lex: RefLexer<'lex>, expected: &str) -> Parser<'lex, String, String> {
166        let tok = lex.peek().clone();
167        if tok.kind == TokenKind::Identifier && tok.source() == expected {
168            lex.next(); // consume
169            Parser::Success(lex, tok.source.to_string())
170        } else {
171            Parser::Fail(
172                lex,
173                format!("Expected identifier '{}', got {:?}", expected, tok.kind),
174            )
175        }
176    }
177
178    #[test]
179    fn test_parser_success_and_fail_variants() {
180        let mut lexer = Lexer::new("abc");
181        let result_success = parse_ident(&mut lexer, "abc");
182        match result_success {
183            Parser::Success(_, val) => assert_eq!(val, "abc"),
184            _ => panic!("Expected Success"),
185        }
186
187        let mut lexer = Lexer::new("xyz");
188        let result_fail = parse_ident(&mut lexer, "abc");
189        match result_fail {
190            Parser::Fail(_, err) => assert!(err.contains("Expected identifier 'abc'")),
191            _ => panic!("Expected Fail"),
192        }
193    }
194
195    #[test]
196    fn test_parser_success_method() {
197        let mut lexer = Lexer::new("abc");
198        let result = parse_ident(&mut lexer, "abc").success();
199        assert_eq!(result.ok().unwrap(), "abc");
200
201        let mut lexer2 = Lexer::new("xyz");
202        let result2 = parse_ident(&mut lexer2, "abc").success();
203        assert!(result2.is_err());
204        let (remaining_lexer, err) = result2.err().unwrap();
205        assert_eq!(remaining_lexer.next().source(), "xyz");
206        assert!(err.contains("Expected identifier 'abc'"));
207    }
208
209    #[test]
210    fn test_parser_or_else() {
211        // Test standard or_else functionality where first parser fails and second succeeds
212        let mut lexer = Lexer::new("xyz");
213        let result = parse_ident(&mut lexer, "abc").or_else(|lex| parse_ident(lex, "xyz"));
214
215        match result {
216            Parser::Success(_, val) => assert_eq!(val, "xyz"),
217            _ => panic!("Expected success after or_else fallback"),
218        }
219
220        // Test or_else where first parser succeeds (second should not run)
221        let mut lexer = Lexer::new("abc");
222        let result = parse_ident(&mut lexer, "abc")
223            .or_else(|_lex| panic!("Should not execute or_else fallback when first succeeded"));
224        match result {
225            Parser::Success(_, val) => assert_eq!(val, "abc"),
226            _ => panic!("Expected success"),
227        }
228    }
229
230    #[test]
231    fn test_parser_and_then() {
232        // Test chaining with and_then
233        // We want to parse "abc" then "xyz"
234        let mut lexer = Lexer::new("abc xyz");
235        let result = parse_ident(&mut lexer, "abc").and_then(|lex, first_val| {
236            parse_ident(lex, "xyz")
237                .and_then(|lex, second_val| Parser::Success(lex, (first_val, second_val)))
238        });
239
240        match result {
241            Parser::Success(_, (v1, v2)) => {
242                assert_eq!(v1, "abc");
243                assert_eq!(v2, "xyz");
244            }
245            _ => panic!("Expected success for chained and_then"),
246        }
247
248        // Test and_then failure propagation
249        let mut lexer2 = Lexer::new("abc error");
250        let result2 =
251            parse_ident(&mut lexer2, "abc").and_then(|lex, _first_val| parse_ident(lex, "xyz"));
252        match result2 {
253            Parser::Fail(_, err) => assert!(err.contains("Expected identifier 'xyz'")),
254            _ => panic!("Expected Fail"),
255        }
256
257        // Test and_then when first parser fails (second should not run)
258        let mut lexer3 = Lexer::new("error xyz");
259        let result3 = parse_ident(&mut lexer3, "abc").and_then(
260            |_lex, _first_val| -> Parser<String, String> {
261                panic!("Should not execute and_then function when first failed")
262            },
263        );
264        match result3 {
265            Parser::Fail(_, err) => assert!(err.contains("Expected identifier 'abc'")),
266            _ => panic!("Expected Fail"),
267        }
268    }
269
270    #[test]
271    fn test_try_parse_macro() {
272        // Define a parser function that parses "abc xyz" using try_parse!
273        fn parse_pair<'lex>(lex: RefLexer<'lex>) -> Parser<'lex, (String, String), String> {
274            let (lex, first) = try_parse!(parse_ident(lex, "abc"));
275            let (lex, second) = try_parse!(parse_ident(lex, "xyz"));
276            Parser::Success(lex, (first, second))
277        }
278
279        let mut lexer = Lexer::new("abc xyz");
280        let res = parse_pair(&mut lexer);
281        match res {
282            Parser::Success(_, (v1, v2)) => {
283                assert_eq!(v1, "abc");
284                assert_eq!(v2, "xyz");
285            }
286            _ => panic!("Expected success using try_parse!"),
287        }
288
289        let mut lexer2 = Lexer::new("abc err");
290        let res2 = parse_pair(&mut lexer2);
291        match res2 {
292            Parser::Fail(_, err) => assert!(err.contains("Expected identifier 'xyz'")),
293            _ => panic!("Expected Fail using try_parse!"),
294        }
295    }
296
297    #[test]
298    fn test_try_parse_macro_in_place() {
299        fn parse_pair_in_place<'lex>(
300            mut lex: RefLexer<'lex>,
301        ) -> Parser<'lex, (String, String), String> {
302            let first = try_parse!(lex, parse_ident(lex, "abc"));
303            let second = try_parse!(lex, parse_ident(lex, "xyz"));
304            Parser::Success(lex, (first, second))
305        }
306
307        let mut lexer = Lexer::new("abc xyz");
308        let res = parse_pair_in_place(&mut lexer);
309        match res {
310            Parser::Success(_, (v1, v2)) => {
311                assert_eq!(v1, "abc");
312                assert_eq!(v2, "xyz");
313            }
314            _ => panic!("Expected success using try_parse! in-place"),
315        }
316
317        let mut lexer2 = Lexer::new("abc err");
318        let res2 = parse_pair_in_place(&mut lexer2);
319        match res2 {
320            Parser::Fail(_, err) => assert!(err.contains("Expected identifier 'xyz'")),
321            _ => panic!("Expected Fail using try_parse! in-place"),
322        }
323    }
324
325    #[test]
326    fn test_combinator_many() {
327        let mut lexer = Lexer::new("abc abc abc xyz");
328        let result = many(&mut lexer, |l| parse_ident(l, "abc"));
329        match result {
330            Parser::Success(remaining_lexer, items) => {
331                assert_eq!(items, vec!["abc", "abc", "abc"]);
332                assert_eq!(remaining_lexer.next().source(), "xyz");
333            }
334            _ => panic!("Expected Success for many"),
335        }
336
337        let mut lexer2 = Lexer::new("xyz");
338        let result2 = many(&mut lexer2, |l| parse_ident(l, "abc"));
339        match result2 {
340            Parser::Success(remaining_lexer, items) => {
341                assert!(items.is_empty());
342                assert_eq!(remaining_lexer.next().source(), "xyz");
343            }
344            _ => panic!("Expected Success for many with empty results"),
345        }
346    }
347
348    #[test]
349    fn test_combinator_many1() {
350        let mut lexer = Lexer::new("abc abc xyz");
351        let result = many1(&mut lexer, |l| parse_ident(l, "abc"));
352        match result {
353            Parser::Success(remaining_lexer, items) => {
354                assert_eq!(items, vec!["abc", "abc"]);
355                assert_eq!(remaining_lexer.next().source(), "xyz");
356            }
357            _ => panic!("Expected Success for many1"),
358        }
359
360        let mut lexer2 = Lexer::new("xyz");
361        let result2 = many1(&mut lexer2, |l| parse_ident(l, "abc"));
362        match result2 {
363            Parser::Fail(remaining_lexer, err) => {
364                assert!(err.contains("Expected identifier 'abc'"));
365                assert_eq!(remaining_lexer.next().source(), "xyz");
366            }
367            _ => panic!("Expected Fail for many1 on immediately failing parser"),
368        }
369    }
370
371    #[test]
372    fn test_combinator_sep_by() {
373        fn parse_comma<'lex>(lex: RefLexer<'lex>) -> Parser<'lex, (), String> {
374            let tok = lex.peek().clone();
375            if tok.kind == TokenKind::Comma {
376                lex.next();
377                Parser::Success(lex, ())
378            } else {
379                Parser::Fail(lex, "Expected comma".to_string())
380            }
381        }
382
383        let mut lexer = Lexer::new("abc , abc , abc ;");
384        let result = sep_by(&mut lexer, |l| parse_ident(l, "abc"), parse_comma);
385        match result {
386            Parser::Success(remaining_lexer, items) => {
387                assert_eq!(items, vec!["abc", "abc", "abc"]);
388                assert_eq!(remaining_lexer.next().kind, TokenKind::SemiColon);
389            }
390            _ => panic!("Expected Success for sep_by"),
391        }
392
393        let mut lexer2 = Lexer::new("xyz");
394        let result2 = sep_by(&mut lexer2, |l| parse_ident(l, "abc"), parse_comma);
395        match result2 {
396            Parser::Success(remaining_lexer, items) => {
397                assert!(items.is_empty());
398                assert_eq!(remaining_lexer.next().source(), "xyz");
399            }
400            _ => panic!("Expected Success for sep_by on empty sequence"),
401        }
402    }
403}