aoc_parse/
macros.rs

1//! Implementation of the `parser!` macro.
2//!
3//! # Syntax errors
4//!
5//! The doc-tests below address cases where the pattern is invalid and
6//! compilation should fail.
7//!
8//! ```compile_fail
9//! # use aoc_parse::parser;
10//! let p = parser!(label:);
11//! //      ^ERROR: missing pattern after `label:`
12//! ```
13//!
14//! ```compile_fail
15//! # use aoc_parse::parser;
16//! let p = parser!(nothing: => nothing);
17//! //      ^ERROR: missing pattern after `nothing:`
18//! ```
19//!
20//! ```compile_fail
21//! # use aoc_parse::{parser, prelude::*};
22//! let p = parser!("double label " a:b:u32);
23//! //      ^ERROR: missing pattern between `a:` and `b:`
24//! ```
25//!
26//! ```compile_fail
27//! # use aoc_parse::{parser, prelude::*};
28//! let p = parser!(alpha*?);
29//! //      ^ERROR: non-greedy quantifier `*?` is not supported
30//! ```
31//!
32//! ```compile_fail
33//! # use aoc_parse::{parser, prelude::*};
34//! let p = parser!(? "hello world");
35//! //      ^ERROR: quantifier `?` has to come after something
36//! ```
37//!
38//! ```compile_fail
39//! # use aoc_parse::{parser, prelude::*};
40//! let p = parser!(rule value: i64 = i64;);
41//! //      ^ERROR: missing final pattern
42//! ```
43//!
44//! ```compile_fail
45//! # use aoc_parse::{parser, prelude::*};
46//! let p = parser! {
47//!     rule expr = {
48//!         t:term => t,
49//!         l:expr "+" r:term => l + r,
50//!     }
51//!     rule term = x:i64 => x;
52//!     // ^ERROR: missing semicolon before: rule term = x...
53//!     expr
54//! };
55//! ```
56//!
57//! This one is not something we can detect at macro-expand time,
58//! but the ambiguity in `ident ( )` between function call and concatenation
59//! is always resolved in favor of a function call, regardless of whether `ident`
60//! actually names a function. We let Rust typeck flag the error if the user
61//! intended concatenation.
62//!
63//! ```compile_fail
64//! # use aoc_parse::{parser, prelude::*};
65//! const PROMPT: &str = "> ";
66//! let p = parser!(PROMPT (alpha '-')*);
67//! //      ^ERROR: call expression requires function
68//! ```
69
70pub use crate::parsers::{
71    alt, empty, lines, map, opt, pair, plus, sequence, single_value, star, RuleParser,
72    RuleSetBuilder,
73};
74
75/// Macro that creates a parser for a given pattern.
76///
77/// See [the top-level documentation][lib] for more about how to write patterns.
78///
79/// Here's a formal syntax for patterns:
80///
81/// ```text
82/// pattern ::= expr
83///
84/// expr ::= seq
85///   | seq "=>" rust_expr      -- custom conversion
86///
87/// seq ::= lterm
88///   | seq lterm               -- concatenated subpatterns
89///
90/// lterm ::= term
91///   | ident ":" term          -- labeled subpattern
92///
93/// term ::= prim
94///   | term "*"                -- optional repeating
95///   | term "+"                -- repeating
96///   | term "?"                -- optional
97///
98/// prim ::= "(" expr ")"
99///   | ident "(" expr,* ")"    -- function call
100///   | ident                   -- named parser (when not followed by `(`)
101///   | literal                 -- exact char or string
102///   | "{" expr,* "}"          -- one-of syntax
103///
104/// ident ::= a Rust identifier
105/// expr ::= a Rust expression
106/// literal ::= a Rust literal
107/// ```
108///
109/// [lib]: crate#patterns
110#[macro_export]
111macro_rules! parser {
112    ($($pattern:tt)*) => { $crate::aoc_parse_helper!( $( $pattern )* ) }
113}
114
115#[macro_export]
116#[doc(hidden)]
117macro_rules! aoc_parse_helper {
118    // aoc_parse_helper!(@seq [expr] [stack] [patterns])
119    //
120    // Submacro to transform a pattern matching `expr` to a Rust Parser
121    // expression.
122    //
123    // Gradually parses the tokens in `expr`, producing `stack` (in reverse)
124    // and `patterns` (in reverse for no good reason), then at the end converts
125    // those output-stacks into a Rust parser-expression using `@reverse` and
126    // `@reverse_pats`.
127    //
128    // `stack` is a list of Rust expressions, parsers for the elements of the
129    // `expr`. `patterns` is a list of patterns that match the output of the
130    // overall SequenceParser we will build from the bits in `stack`.
131    //
132    // BUG: Because of the simplistic way this macro-parses the input, it
133    // doesn't reject some bad syntax like `foo?(x)` or `foo??` or `foo++`.
134
135    // Mapper at the end of a pattern that is not labeled, `expr ::= label => rust_expr`.
136    (@seq [ => $mapper:expr ] [ $($stack:tt)* ] [ $($pats:tt ,)* ]) => {
137        $crate::macros::map(
138            $crate::aoc_parse_helper!(@reverse_map [ $($stack)* ] []),
139            | ( $crate::aoc_parse_helper!(@reverse_pats [ $($pats ,)* ] []) ) | $mapper ,
140        )
141    };
142
143    // Reject unsupported non-greedy regex syntax.
144    (@seq [ * ? $($tail:tt)* ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
145        core::compile_error!("non-greedy quantifier `*?` is not supported")
146    };
147
148    // Reject unsupported non-greedy regex syntax.
149    (@seq [ + ? $($tail:tt)* ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
150        core::compile_error!("non-greedy quantifier `+?` is not supported")
151    };
152
153    // Detect Kleene * and apply it to the preceding term.
154    (@seq [ * $($tail:tt)* ] [ $top:expr , $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
155        $crate::aoc_parse_helper!(@seq [ $($tail)* ] [ $crate::macros::star($top) , $($stack ,)* ] [ $($pats ,)* ])
156    };
157
158    // Detect Kleene + and apply it to the preceding term.
159    (@seq [ + $($tail:tt)* ] [ $top:expr , $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
160        $crate::aoc_parse_helper!(@seq [ $($tail)* ] [ $crate::macros::plus($top) , $($stack ,)* ] [ $($pats ,)* ])
161    };
162
163    // Detect optional `?` and apply it to the preceding term.
164    (@seq [ ? $($tail:tt)* ] [ $top:expr , $($stack:tt)* ] [ $($pats:tt ,)* ]) => {
165        $crate::aoc_parse_helper!(@seq [ $($tail)* ] [ $crate::macros::opt($top) , $($stack)* ] [ $($pats ,)* ])
166    };
167
168    // A quantifier at the beginning of input (nothing on the stack) is an errror.
169    (@seq [ * $($tail:tt)* ] [ ] [ $($pats:tt ,)* ]) => {
170        core::compile_error!("quantifier `*` has to come after something, not at the start of an expression.")
171    };
172    (@seq [ + $($tail:tt)* ] [ ] [ $($pats:tt ,)* ]) => {
173        core::compile_error!("quantifier `+` has to come after something, not at the start of an expression.")
174    };
175    (@seq [ ? $($tail:tt)* ] [ ] [ $($pats:tt ,)* ]) => {
176        core::compile_error!("quantifier `?` has to come after something, not at the start of an expression.")
177    };
178
179    // Reject incorrect label syntax.
180    (@seq [ $label:ident : => $($tail:tt)* ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
181        core::compile_error!(
182            core::concat!("missing pattern after `", core::stringify!($label), ":`")
183        );
184    };
185    (@seq [ $label:ident : ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
186        core::compile_error!(
187            core::concat!("missing pattern after `", core::stringify!($label), ":`")
188        );
189    };
190    (@seq [ $label1:ident : $label2:ident : $( $tail:tt )* ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
191        core::compile_error!(
192            core::concat!(
193                "missing pattern between `", core::stringify!($label1), ":` and `",
194                    core::stringify!($label2), ":`"
195            )
196        );
197    };
198
199    // Function call
200    (@seq [ $f:ident ( $($args:tt)* ) $($tail:tt)* ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
201        $crate::aoc_parse_helper!(
202            @seq
203            [ $($tail)* ]
204            [
205                $crate::aoc_parse_helper!(@args ( $f ) [ $( $args )* ] [] ())
206                ,
207                $($stack ,)*
208            ]
209            [ _ , $($pats ,)* ]
210        )
211    };
212
213    // Labelled function call
214    (@seq [ $label:ident : $f:ident ( $( $args:tt )* )  $( $tail:tt )* ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
215        $crate::aoc_parse_helper!(
216            @seq
217            [ $($tail)* ]
218            [
219                $crate::aoc_parse_helper!(@args ( $f ) [ $( $args )* ] [] ())
220                ,
221                $($stack ,)*
222            ]
223            [ $label , $($pats ,)* ]
224        )
225    };
226
227    // any Rust literal (strings and chars are valid patterns; others may be
228    // used as function arguments)
229    (@seq [ $x:literal $($tail:tt)* ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
230        $crate::aoc_parse_helper!(
231            @seq
232            [ $($tail)* ]
233            [
234                $crate::aoc_parse_helper!(@prim $x) ,
235                $($stack ,)*
236            ]
237            [ _, $($pats ,)* ]
238        )
239    };
240
241    // Other labeled term
242    (@seq [ $label:ident : $x:tt $($tail:tt)* ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
243        $crate::aoc_parse_helper!(
244            @seq
245            [ $($tail)* ]
246            [
247                $crate::aoc_parse_helper!(@prim $x) ,
248                $($stack ,)*
249            ]
250            [ $label , $($pats ,)* ]
251        )
252    };
253
254    // the first `tt` of any other `term`
255    (@seq [ $x:tt $($tail:tt)* ] [ $($stack:expr ,)* ] [ $($pats:tt ,)* ]) => {
256        $crate::aoc_parse_helper!(
257            @seq
258            [ $($tail)* ]
259            [
260                $crate::aoc_parse_helper!(@prim $x) ,
261                $($stack ,)*
262            ]
263            [ _ , $($pats ,)* ]
264        )
265    };
266
267    // end of input
268    (@seq [ ] [ $($parts:expr ,)* ] [ $($pats:tt ,)* ]) => {
269        $crate::aoc_parse_helper!(@reverse [ $($parts ,)* ] [])
270    };
271
272    // anything not matched by this point is an error
273    (@seq [ $($tail:tt)* ] [ $($parts:expr ,)* ] [ $($pats:tt ,)* ]) => {
274        core::compile_error!(stringify!(unrecognized syntax @ $($tail)*))
275    };
276
277    // aoc_parse_helper!(@reverse [input expr stack] [output stack])
278    //
279    // Take the stack of parsers and produce a single sequence-parser.
280    (@reverse [ ] [ ]) => {
281        $crate::macros::empty()
282    };
283    (@reverse [ ] [ $out:expr ]) => {
284        $out
285    };
286    (@reverse [ $head:expr , $($tail:expr ,)* ] [ ]) => {
287        $crate::aoc_parse_helper!(@reverse [ $($tail ,)* ] [ $head ])
288    };
289    (@reverse [ $head:expr , $($tail:expr ,)* ] [ $out:expr ]) => {
290        $crate::aoc_parse_helper!(@reverse [ $($tail ,)* ] [ $crate::macros::sequence($head, $out) ])
291    };
292
293    // aoc_parse_helper!(@reverse_map [input expr stack] [output expr])
294    //
295    // Take the stack of parsers and make a single parser that produces nested
296    // pairs. For example, if the input is [d, c, b, a] this produces the
297    // output `pair(a, pair(b, pair(c, d)))`.
298    (@reverse_map [] []) => {
299        $crate::macros::empty()
300    };
301    (@reverse_map [] [ $out:expr ]) => {
302        $out
303    };
304    (@reverse_map [ $head:expr , $( $tail:expr , )* ] []) => {
305        $crate::aoc_parse_helper!(@reverse_map [ $( $tail , )* ] [ $head ])
306    };
307    (@reverse_map [ $head:expr , $( $tail:expr , )* ] [ $out:expr ]) => {
308        $crate::aoc_parse_helper!(
309            @reverse_map
310                [ $( $tail , )* ]
311                [ $crate::macros::pair($head, $out) ]
312        )
313    };
314
315    // aoc_parse_helper!(@reverse_pats [pattern stack] [output stack])
316    //
317    // Take the stack of Rust patterns and produce a single pattern.
318    (@reverse_pats [] []) => {
319        ()
320    };
321    (@reverse_pats [] [ $out:pat ]) => {
322        $out
323    };
324    (@reverse_pats [ $head:pat , $( $tail:pat , )* ] []) => {
325        $crate::aoc_parse_helper!(@reverse_pats [ $( $tail , )* ] [ $head ])
326    };
327    (@reverse_pats [ $head:pat , $( $tail:pat , )* ] [ $out:pat ]) => {
328        $crate::aoc_parse_helper!(@reverse_pats [ $( $tail , )* ] [ ($head, $out) ])
329    };
330
331    // aoc_parse_helper!(@prim pattern)
332    //
333    // Transform a `prim` into a Rust Parser expression.
334    (@prim $x:ident) => {
335        $x
336    };
337    (@prim $x:literal) => {
338        $x
339    };
340    (@prim ( $($nested:tt)* )) => {
341        $crate::macros::single_value(
342            $crate::aoc_parse_helper!(@seq [ $( $nested )* ] [ ] [ ])
343        )
344    };
345    (@prim { $($nested:tt)* }) => {
346        $crate::aoc_parse_helper!(@list [ $( $nested )* ] [ ] [ ])
347    };
348
349    // aoc_parse_helper!(@args fn_expr [unexamined input tokens] [current argument holding area] [transformed output argument exprs])
350    //
351    // Transform argument lists.
352
353    // end of an argument in an argument list
354    (@args ( $f:expr ) [ , $($tail:tt)* ] [ $($seq:tt)* ] ( $( $arg:expr , )* )) => {
355        $crate::aoc_parse_helper!(
356            @args
357            ( $f )
358            [ $( $tail )* ]
359            [ ]
360            (
361                $( $arg , )*
362                $crate::aoc_parse_helper!(@seq [ $( $seq )* ] [ ] [ ]) ,
363            )
364        )
365    };
366
367    // not the end of an arg; just move a token from the input to the holding area
368    (@args ( $f:expr ) [ $next:tt $($tail:tt)* ] [ $($seq:tt)* ] ( $( $out:expr , )* )) => {
369        $crate::aoc_parse_helper!(
370            @args
371            ( $f )
372            [ $( $tail )* ]
373            [ $( $seq )* $next ]
374            ( $( $out , )* )
375        )
376    };
377
378    // end of argument list, after trailing comma or empty
379    (@args ( $f:expr ) [] [] ( $( $out:expr , )* )) => {
380        $f ( $( $out , )* )
381    };
382
383    // end of argument list with no trailing comma: infer one
384    (@args ( $f:expr ) [] [ $($seq:tt)+ ] ( $( $out:expr , )* )) => {
385        $crate::aoc_parse_helper!(@args ( $f ) [,] [ $($seq)+ ] ( $( $out , )* ))
386    };
387
388    // aoc_parse_helper!(@list [unexamined input tokens] [current arm holding area] [transformed output arm parser expressions])
389    //
390    // The list of patterns in the body of an alternation.
391
392    // end of first arm of an alternation
393    (@list [ , $($tail:tt)* ] [ $($seq:tt)* ] [ ]) => {
394        $crate::aoc_parse_helper!(
395            @list
396            [ $( $tail )* ]
397            [ ]
398            [ $crate::aoc_parse_helper!(@seq [ $( $seq )* ] [ ] [ ]) ]
399        )
400    };
401
402    // end of a non-first arm of an alternation
403    (@list [ , $($tail:tt)* ] [ $($seq:tt)* ] [ $out:expr ]) => {
404        $crate::aoc_parse_helper!(
405            @list
406            [ $( $tail )* ]
407            [ ]
408            [ $crate::macros::alt($out, $crate::aoc_parse_helper!(@seq [ $( $seq )* ] [ ] [ ])) ]
409        )
410    };
411
412    // not the end of an arm; just move a token from the input to the holding area
413    (@list [ $next:tt $($tail:tt)* ] [ $($seq:tt)* ] [ $($out:expr)? ]) => {
414        $crate::aoc_parse_helper!(
415            @list
416            [ $( $tail )* ]
417            [ $( $seq )* $next ]
418            [ $( $out )? ]
419        )
420    };
421
422    // completely empty alternation; could technically be understood as never matching,
423    // but it's not a useful thing to express, so reject.
424    (@list [ ] [ ] [ ]) => {
425        ::core::compile_error("no arms in alternation")
426    };
427
428    // end of alternation after comma
429    (@list [ ] [ ] [ $out:expr ]) => {
430        $out
431    };
432
433    // end of alternation with no comma: infer one
434    (@list [ ] [ $($seq:tt)+ ] [ $( $out:expr )? ]) => {
435        $crate::aoc_parse_helper!(@list [,] [ $($seq)+ ] [ $( $out )? ])
436    };
437
438    // aoc_parse_helper!(@rules [rules] (pattern))
439    //
440    // Called after @split_rules is done to translate a rule set into Rust code.
441
442    // With no rules, delegate to @seq.
443    (@rules [] ( $( $pattern:tt )* )) => {
444        $crate::parser!(@seq [ $( $pattern )* ] [] [])
445    };
446
447    (@rules
448        [ $( [ rule $name:ident : $output_ty:ty = $( $rule_pat:tt )* ] )+ ]
449        ( $( $pattern:tt )* )
450    ) => {
451        {
452            let mut builder = $crate::macros::RuleSetBuilder::new();
453            $(
454                let $name : $crate::macros::RuleParser<$output_ty> = builder.new_rule();
455            )*
456            $(
457                builder.assign_parser_for_rule(
458                    &$name,
459                    $crate::parser!(@seq [ $( $rule_pat )* ] [] [])
460                );
461            )*
462            builder.build($crate::parser!(@seq [ $( $pattern )* ] [] []))
463        }
464    };
465
466    // aoc_parse_helper!(@split_rules [source tokens] [] [])
467    //
468    // Split the source tokens into rules and a final pattern.
469    // Then call `aoc_parse_helper!(@rules [[rule]*] (pattern))`.
470    (@split_rules [ rule $( $tail:tt )* ] [] [ $( $out:tt )* ]) => {
471        $crate::aoc_parse_helper!(
472            @split_rules
473                [ $( $tail )* ]
474                [ rule ]
475                [ $( $out )* ]
476        )
477    };
478
479    (@split_rules [ $( $tail:tt )+ ] [] [ $( $out:tt )* ]) => {
480        $crate::aoc_parse_helper!(
481            @rules
482                [ $( $out )* ]
483                ( $( $tail )+ )
484        )
485    };
486
487    (@split_rules [ ; $( $tail:tt )* ] [ $( $rule:tt )* ] [ $( $out:tt )* ]) => {
488        $crate::aoc_parse_helper!(
489            @split_rules
490                [ $( $tail )* ]
491                []
492                [ $( $out )* [ $( $rule )* ] ]
493        )
494    };
495
496    (@split_rules [ rule $( $tail:tt )* ] [ $( $rule:tt )+ ] [ $( $out:tt )* ]) => {
497        ::core::compile_error!(stringify!(missing semicolon before: rule $($tail)*))
498    };
499
500    (@split_rules [ $other:tt $( $tail:tt )* ] [ $( $rule:tt )* ] [ $( $out:tt )* ]) => {
501        $crate::aoc_parse_helper!(
502            @split_rules
503                [ $( $tail )* ]
504                [ $( $rule )* $other ]
505                [ $( $out )* ]
506        )
507    };
508
509    (@split_rules [] [] [ $( $out:tt )* ]) => {
510        ::core::compile_error!("missing final pattern (at the end of a rule set, specify which rule is the starting point for parsing)")
511    };
512
513    // aoc_parse_helper!(@...) - This is an internal error, shouldn't happen in the wild.
514    (@ $($tail:tt)*) => {
515        ::core::compile_error!(stringify!(unrecognized syntax @ $($tail)*))
516    };
517
518    // Hand anything else off to the @split_rules submacro.
519    ( $( $tail:tt )* ) => {
520        $crate::macros::single_value(
521            $crate::aoc_parse_helper!(@split_rules [ $($tail)* ] [ ] [ ])
522        )
523    };
524}