Skip to main content

trampoline_parser/
parser_dsl.rs

1//! Parser DSL for defining grammar rules (scannerless parsing)
2//!
3//! # Example
4//!
5//! ```rust
6//! use trampoline_parser::Grammar;
7//!
8//! let grammar = Grammar::new()
9//!     .rule("number", |r| {
10//!         r.capture(r.one_or_more(r.digit()))
11//!     })
12//!     .rule("expr", |r| {
13//!         r.sequence((
14//!             r.parse("number"),
15//!             r.lit("+"),
16//!             r.parse("number"),
17//!         ))
18//!     });
19//! ```
20
21use crate::Assoc;
22use crate::ir::{CharClass, Combinator, InfixOp, PostfixOp, PrattDef, PrefixOp, TernaryOp};
23use proc_macro2::TokenStream;
24
25/// Builder for parser rules
26#[derive(Debug)]
27pub struct RuleBuilder {
28    #[allow(dead_code)]
29    name: String,
30}
31
32impl RuleBuilder {
33    pub fn new(name: &str) -> Self {
34        Self {
35            name: name.to_string(),
36        }
37    }
38
39    /// Reference another rule by name
40    pub fn parse(&self, rule_name: &str) -> Combinator {
41        Combinator::Rule(rule_name.to_string())
42    }
43
44    // === Character-level primitives (scannerless parsing) ===
45
46    /// Match a literal string exactly (e.g., "if", "===", "+")
47    pub fn lit(&self, s: &str) -> Combinator {
48        Combinator::Literal(s.to_string())
49    }
50
51    /// Match a single specific character
52    pub fn char(&self, c: char) -> Combinator {
53        Combinator::Char(c)
54    }
55
56    /// Match any decimal digit (0-9)
57    pub fn digit(&self) -> Combinator {
58        Combinator::CharClass(CharClass::Digit)
59    }
60
61    /// Match any hexadecimal digit (0-9, a-f, A-F)
62    pub fn hex_digit(&self) -> Combinator {
63        Combinator::CharClass(CharClass::HexDigit)
64    }
65
66    /// Match any alphabetic character (a-z, A-Z)
67    pub fn alpha(&self) -> Combinator {
68        Combinator::CharClass(CharClass::Alpha)
69    }
70
71    /// Match any alphanumeric character (a-z, A-Z, 0-9)
72    pub fn alpha_num(&self) -> Combinator {
73        Combinator::CharClass(CharClass::AlphaNumeric)
74    }
75
76    /// Match any whitespace character (space, tab, newline, etc.)
77    pub fn ws(&self) -> Combinator {
78        Combinator::CharClass(CharClass::Whitespace)
79    }
80
81    /// Match identifier start character (a-z, A-Z, _, $)
82    pub fn ident_start(&self) -> Combinator {
83        Combinator::CharClass(CharClass::IdentStart)
84    }
85
86    /// Match identifier continue character (a-z, A-Z, 0-9, _, $)
87    pub fn ident_cont(&self) -> Combinator {
88        Combinator::CharClass(CharClass::IdentCont)
89    }
90
91    /// Match any single character
92    pub fn any_char(&self) -> Combinator {
93        Combinator::AnyChar
94    }
95
96    /// Match a character in the given range (inclusive)
97    pub fn range(&self, from: char, to: char) -> Combinator {
98        Combinator::CharRange(from, to)
99    }
100
101    /// Negative lookahead: succeed if inner does NOT match, consume nothing
102    pub fn not_followed_by(&self, inner: Combinator) -> Combinator {
103        Combinator::NotFollowedBy(Box::new(inner))
104    }
105
106    /// Positive lookahead: succeed if inner matches, consume nothing
107    pub fn followed_by(&self, inner: Combinator) -> Combinator {
108        Combinator::FollowedBy(Box::new(inner))
109    }
110
111    /// Capture the matched text as a string token
112    pub fn capture(&self, inner: Combinator) -> Combinator {
113        Combinator::Capture(Box::new(inner))
114    }
115
116    /// Memoize the result of parsing to avoid exponential backtracking.
117    ///
118    /// When a memoized combinator is tried at a position, the result is cached.
119    /// If the same combinator is tried again at the same position (due to
120    /// backtracking), the cached result is returned instead of re-parsing.
121    ///
122    /// Use this for rules that:
123    /// 1. Appear in multiple Choice alternatives
124    /// 2. Contain recursion
125    /// 3. Are frequently backtracked
126    ///
127    /// The `id` parameter must be unique across all memoization points.
128    pub fn memoize(&self, id: usize, inner: Combinator) -> Combinator {
129        Combinator::Memoize {
130            id,
131            inner: Box::new(inner),
132        }
133    }
134
135    /// Sequence of combinators
136    pub fn sequence<T: IntoCombinatorsVec>(&self, items: T) -> Combinator {
137        Combinator::Sequence(items.into_combinators_vec())
138    }
139
140    /// Ordered choice (first match wins, auto-backtrack)
141    pub fn choice<T: IntoCombinatorsVec>(&self, items: T) -> Combinator {
142        Combinator::Choice(items.into_combinators_vec())
143    }
144
145    /// Zero or more
146    pub fn zero_or_more(&self, inner: Combinator) -> Combinator {
147        Combinator::ZeroOrMore(Box::new(inner))
148    }
149
150    /// One or more
151    pub fn one_or_more(&self, inner: Combinator) -> Combinator {
152        Combinator::OneOrMore(Box::new(inner))
153    }
154
155    /// Optional (zero or one)
156    pub fn optional(&self, inner: Combinator) -> Combinator {
157        Combinator::Optional(Box::new(inner))
158    }
159
160    /// Parse but discard result
161    pub fn skip(&self, inner: Combinator) -> Combinator {
162        Combinator::Skip(Box::new(inner))
163    }
164
165    /// Separated list: item (sep item)*
166    pub fn separated_by(&self, item: Combinator, separator: Combinator) -> Combinator {
167        Combinator::SeparatedBy {
168            item: Box::new(item),
169            separator: Box::new(separator),
170            trailing: false,
171        }
172    }
173
174    /// Separated list with optional trailing separator
175    pub fn separated_by_trailing(&self, item: Combinator, separator: Combinator) -> Combinator {
176        Combinator::SeparatedBy {
177            item: Box::new(item),
178            separator: Box::new(separator),
179            trailing: true,
180        }
181    }
182
183    /// Pratt expression parsing
184    pub fn pratt<F>(&self, operand: Combinator, f: F) -> Combinator
185    where
186        F: FnOnce(PrattBuilder) -> PrattBuilder,
187    {
188        let builder = PrattBuilder::new(operand);
189        let builder = f(builder);
190        Combinator::Pratt(builder.build())
191    }
192}
193
194/// Extension trait for Combinator to add AST mapping
195pub trait CombinatorExt {
196    fn ast(self, mapping: TokenStream) -> Combinator;
197}
198
199impl CombinatorExt for Combinator {
200    /// Apply AST mapping to this combinator
201    fn ast(self, mapping: TokenStream) -> Combinator {
202        Combinator::Mapped {
203            inner: Box::new(self),
204            mapping: mapping.to_string(),
205        }
206    }
207}
208
209/// Trait for converting tuples to Vec<Combinator>
210pub trait IntoCombinatorsVec {
211    fn into_combinators_vec(self) -> Vec<Combinator>;
212}
213
214// Implement for various tuple sizes
215impl IntoCombinatorsVec for (Combinator,) {
216    fn into_combinators_vec(self) -> Vec<Combinator> {
217        vec![self.0]
218    }
219}
220
221impl IntoCombinatorsVec for (Combinator, Combinator) {
222    fn into_combinators_vec(self) -> Vec<Combinator> {
223        vec![self.0, self.1]
224    }
225}
226
227impl IntoCombinatorsVec for (Combinator, Combinator, Combinator) {
228    fn into_combinators_vec(self) -> Vec<Combinator> {
229        vec![self.0, self.1, self.2]
230    }
231}
232
233impl IntoCombinatorsVec for (Combinator, Combinator, Combinator, Combinator) {
234    fn into_combinators_vec(self) -> Vec<Combinator> {
235        vec![self.0, self.1, self.2, self.3]
236    }
237}
238
239impl IntoCombinatorsVec for (Combinator, Combinator, Combinator, Combinator, Combinator) {
240    fn into_combinators_vec(self) -> Vec<Combinator> {
241        vec![self.0, self.1, self.2, self.3, self.4]
242    }
243}
244
245impl IntoCombinatorsVec
246    for (
247        Combinator,
248        Combinator,
249        Combinator,
250        Combinator,
251        Combinator,
252        Combinator,
253    )
254{
255    fn into_combinators_vec(self) -> Vec<Combinator> {
256        vec![self.0, self.1, self.2, self.3, self.4, self.5]
257    }
258}
259
260impl IntoCombinatorsVec
261    for (
262        Combinator,
263        Combinator,
264        Combinator,
265        Combinator,
266        Combinator,
267        Combinator,
268        Combinator,
269    )
270{
271    fn into_combinators_vec(self) -> Vec<Combinator> {
272        vec![self.0, self.1, self.2, self.3, self.4, self.5, self.6]
273    }
274}
275
276impl IntoCombinatorsVec
277    for (
278        Combinator,
279        Combinator,
280        Combinator,
281        Combinator,
282        Combinator,
283        Combinator,
284        Combinator,
285        Combinator,
286    )
287{
288    fn into_combinators_vec(self) -> Vec<Combinator> {
289        vec![
290            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7,
291        ]
292    }
293}
294
295impl IntoCombinatorsVec
296    for (
297        Combinator,
298        Combinator,
299        Combinator,
300        Combinator,
301        Combinator,
302        Combinator,
303        Combinator,
304        Combinator,
305        Combinator,
306    )
307{
308    fn into_combinators_vec(self) -> Vec<Combinator> {
309        vec![
310            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7, self.8,
311        ]
312    }
313}
314
315impl IntoCombinatorsVec
316    for (
317        Combinator,
318        Combinator,
319        Combinator,
320        Combinator,
321        Combinator,
322        Combinator,
323        Combinator,
324        Combinator,
325        Combinator,
326        Combinator,
327    )
328{
329    fn into_combinators_vec(self) -> Vec<Combinator> {
330        vec![
331            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7, self.8, self.9,
332        ]
333    }
334}
335
336impl IntoCombinatorsVec
337    for (
338        Combinator,
339        Combinator,
340        Combinator,
341        Combinator,
342        Combinator,
343        Combinator,
344        Combinator,
345        Combinator,
346        Combinator,
347        Combinator,
348        Combinator,
349    )
350{
351    fn into_combinators_vec(self) -> Vec<Combinator> {
352        vec![
353            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7, self.8, self.9, self.10,
354        ]
355    }
356}
357
358impl IntoCombinatorsVec
359    for (
360        Combinator,
361        Combinator,
362        Combinator,
363        Combinator,
364        Combinator,
365        Combinator,
366        Combinator,
367        Combinator,
368        Combinator,
369        Combinator,
370        Combinator,
371        Combinator,
372    )
373{
374    fn into_combinators_vec(self) -> Vec<Combinator> {
375        vec![
376            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7, self.8, self.9,
377            self.10, self.11,
378        ]
379    }
380}
381
382impl IntoCombinatorsVec for Vec<Combinator> {
383    fn into_combinators_vec(self) -> Vec<Combinator> {
384        self
385    }
386}
387
388/// Builder for Pratt parsing operators
389#[derive(Debug)]
390pub struct PrattBuilder {
391    operand: Combinator,
392    prefix_ops: Vec<PrefixOp>,
393    infix_ops: Vec<InfixOp>,
394    postfix_ops: Vec<PostfixOp>,
395    ternary: Option<TernaryOp>,
396}
397
398impl PrattBuilder {
399    fn new(operand: Combinator) -> Self {
400        Self {
401            operand,
402            prefix_ops: Vec::new(),
403            infix_ops: Vec::new(),
404            postfix_ops: Vec::new(),
405            ternary: None,
406        }
407    }
408
409    /// Define a prefix operator with a pattern
410    /// Example: `ops.prefix("-", 16, "|e| unary(e, Neg)")`
411    /// Example: `ops.prefix(r.sequence((r.lit("-"), r.not_followed_by(r.lit("-")))), 16, "...")`
412    pub fn prefix(
413        mut self,
414        pattern: impl Into<Combinator>,
415        precedence: u8,
416        mapping: TokenStream,
417    ) -> Self {
418        self.prefix_ops.push(PrefixOp {
419            pattern: Box::new(pattern.into()),
420            precedence,
421            mapping: mapping.to_string(),
422        });
423        self
424    }
425
426    /// Define a prefix operator for a keyword (ensures not followed by identifier char)
427    /// Example: `ops.prefix_kw("typeof", 16, "|e| unary(e, Typeof)")`
428    pub fn prefix_kw(mut self, keyword: &str, precedence: u8, mapping: TokenStream) -> Self {
429        self.prefix_ops.push(PrefixOp {
430            pattern: Box::new(Combinator::Sequence(vec![
431                Combinator::Literal(keyword.to_string()),
432                Combinator::NotFollowedBy(Box::new(Combinator::CharClass(CharClass::IdentCont))),
433            ])),
434            precedence,
435            mapping: mapping.to_string(),
436        });
437        self
438    }
439
440    /// Define an infix operator with a pattern
441    /// Example: `ops.infix("+", 13, Assoc::Left, "|l, r| binary(l, r, Add)")`
442    /// Example: `ops.infix(r.sequence((r.lit("-"), r.not_followed_by(r.lit("-")))), 9, Left, "...")`
443    pub fn infix(
444        mut self,
445        pattern: impl Into<Combinator>,
446        precedence: u8,
447        assoc: Assoc,
448        mapping: TokenStream,
449    ) -> Self {
450        self.infix_ops.push(InfixOp {
451            pattern: Box::new(pattern.into()),
452            precedence,
453            assoc,
454            mapping: mapping.to_string(),
455        });
456        self
457    }
458
459    /// Define an infix operator for a keyword (ensures not followed by identifier char)
460    /// Example: `ops.infix_kw("in", 11, Assoc::Left, "|l, r| binary(l, r, In)")`
461    pub fn infix_kw(
462        mut self,
463        keyword: &str,
464        precedence: u8,
465        assoc: Assoc,
466        mapping: TokenStream,
467    ) -> Self {
468        self.infix_ops.push(InfixOp {
469            pattern: Box::new(Combinator::Sequence(vec![
470                Combinator::Literal(keyword.to_string()),
471                Combinator::NotFollowedBy(Box::new(Combinator::CharClass(CharClass::IdentCont))),
472            ])),
473            precedence,
474            assoc,
475            mapping: mapping.to_string(),
476        });
477        self
478    }
479
480    /// Define a simple postfix operator with a pattern (++, --)
481    /// Example: `ops.postfix("++", 17, "|e| update(e, Increment, false)")`
482    pub fn postfix(
483        mut self,
484        pattern: impl Into<Combinator>,
485        precedence: u8,
486        mapping: TokenStream,
487    ) -> Self {
488        self.postfix_ops.push(PostfixOp::Simple {
489            pattern: Box::new(pattern.into()),
490            precedence,
491            mapping: mapping.to_string(),
492        });
493        self
494    }
495
496    /// Define a call expression postfix: callee(args)
497    /// Example: `ops.postfix_call("(", ")", ",", 18, "|callee, args| call(callee, args)")`
498    pub fn postfix_call(
499        mut self,
500        open: &str,
501        close: &str,
502        separator: &str,
503        precedence: u8,
504        mapping: TokenStream,
505    ) -> Self {
506        self.postfix_ops.push(PostfixOp::Call {
507            open: Box::new(Combinator::Literal(open.to_string())),
508            close: Box::new(Combinator::Literal(close.to_string())),
509            separator: Box::new(Combinator::Literal(separator.to_string())),
510            arg_rule: None,
511            precedence,
512            mapping: mapping.to_string(),
513        });
514        self
515    }
516
517    /// Define a call expression postfix with a custom argument rule: callee(args)
518    /// The arg_rule is used to parse each argument (e.g., to support spread)
519    /// Example: `ops.postfix_call_with_arg_rule("(", ")", ",", "call_argument", 18, "|callee, args| call(callee, args)")`
520    pub fn postfix_call_with_arg_rule(
521        mut self,
522        open: &str,
523        close: &str,
524        separator: &str,
525        arg_rule: &str,
526        precedence: u8,
527        mapping: TokenStream,
528    ) -> Self {
529        self.postfix_ops.push(PostfixOp::Call {
530            open: Box::new(Combinator::Literal(open.to_string())),
531            close: Box::new(Combinator::Literal(close.to_string())),
532            separator: Box::new(Combinator::Literal(separator.to_string())),
533            arg_rule: Some(arg_rule.to_string()),
534            precedence,
535            mapping: mapping.to_string(),
536        });
537        self
538    }
539
540    /// Define an index expression postfix: obj[index]
541    /// Example: `ops.postfix_index("[", "]", 18, "|obj, prop| member_computed(obj, prop)")`
542    pub fn postfix_index(
543        mut self,
544        open: &str,
545        close: &str,
546        precedence: u8,
547        mapping: TokenStream,
548    ) -> Self {
549        self.postfix_ops.push(PostfixOp::Index {
550            open: Box::new(Combinator::Literal(open.to_string())),
551            close: Box::new(Combinator::Literal(close.to_string())),
552            precedence,
553            mapping: mapping.to_string(),
554        });
555        self
556    }
557
558    /// Define a member access postfix: obj.prop
559    /// Example: `ops.postfix_member(".", 18, "|obj, prop| member(obj, prop)")`
560    pub fn postfix_member(mut self, literal: &str, precedence: u8, mapping: TokenStream) -> Self {
561        self.postfix_ops.push(PostfixOp::Member {
562            pattern: Box::new(Combinator::Literal(literal.to_string())),
563            precedence,
564            mapping: mapping.to_string(),
565        });
566        self
567    }
568
569    /// Define a member access postfix with a custom pattern
570    /// Use this when you need not_followed_by constraints
571    /// Example: `ops.postfix_member_pattern(r.sequence((r.lit("."), r.not_followed_by(r.char('.')))), 18, "|obj, prop| member(obj, prop)")`
572    pub fn postfix_member_pattern(
573        mut self,
574        pattern: Combinator,
575        precedence: u8,
576        mapping: TokenStream,
577    ) -> Self {
578        self.postfix_ops.push(PostfixOp::Member {
579            pattern: Box::new(pattern),
580            precedence,
581            mapping: mapping.to_string(),
582        });
583        self
584    }
585
586    /// Define a rule-based postfix: parses another rule as the suffix
587    /// Used for tagged template literals: tag`template`
588    /// Example: `ops.postfix_rule("template_literal", 18, "|tag, template| tagged_template(tag, template)")`
589    pub fn postfix_rule(mut self, rule_name: &str, precedence: u8, mapping: TokenStream) -> Self {
590        self.postfix_ops.push(PostfixOp::Rule {
591            rule_name: rule_name.to_string(),
592            precedence,
593            mapping: mapping.to_string(),
594        });
595        self
596    }
597
598    /// Define a ternary operator: cond ? then : else
599    /// Example: `ops.ternary("?", ":", 3, "|c, t, f| conditional(c, t, f)")`
600    pub fn ternary(
601        mut self,
602        first: &str,
603        second: &str,
604        precedence: u8,
605        mapping: TokenStream,
606    ) -> Self {
607        self.ternary = Some(TernaryOp {
608            first: Box::new(Combinator::Literal(first.to_string())),
609            second: Box::new(Combinator::Literal(second.to_string())),
610            precedence,
611            mapping: mapping.to_string(),
612        });
613        self
614    }
615
616    fn build(self) -> PrattDef {
617        PrattDef {
618            operand: Box::new(Some(self.operand)),
619            prefix_ops: self.prefix_ops,
620            infix_ops: self.infix_ops,
621            postfix_ops: self.postfix_ops,
622            ternary: self.ternary,
623        }
624    }
625}
626
627#[cfg(test)]
628mod tests {
629    use super::*;
630    use quote::quote;
631
632    #[test]
633    fn test_basic_combinators() {
634        let builder = RuleBuilder::new("test");
635
636        let seq = builder.sequence((builder.lit("a"), builder.lit("b")));
637        assert!(matches!(seq, Combinator::Sequence(_)));
638
639        let choice = builder.choice((builder.lit("a"), builder.lit("b")));
640        assert!(matches!(choice, Combinator::Choice(_)));
641    }
642
643    #[test]
644    fn test_char_level_primitives() {
645        let builder = RuleBuilder::new("test");
646
647        // Test literal
648        assert!(matches!(builder.lit("hello"), Combinator::Literal(_)));
649
650        // Test char
651        assert!(matches!(builder.char('x'), Combinator::Char('x')));
652
653        // Test character classes
654        assert!(matches!(
655            builder.digit(),
656            Combinator::CharClass(CharClass::Digit)
657        ));
658        assert!(matches!(
659            builder.alpha(),
660            Combinator::CharClass(CharClass::Alpha)
661        ));
662        assert!(matches!(
663            builder.ident_start(),
664            Combinator::CharClass(CharClass::IdentStart)
665        ));
666
667        // Test range
668        assert!(matches!(
669            builder.range('a', 'z'),
670            Combinator::CharRange('a', 'z')
671        ));
672
673        // Test any_char
674        assert!(matches!(builder.any_char(), Combinator::AnyChar));
675
676        // Test capture
677        assert!(matches!(
678            builder.capture(builder.digit()),
679            Combinator::Capture(_)
680        ));
681
682        // Test lookahead
683        assert!(matches!(
684            builder.not_followed_by(builder.digit()),
685            Combinator::NotFollowedBy(_)
686        ));
687    }
688
689    #[test]
690    fn test_pratt_builder() {
691        let builder = RuleBuilder::new("expr");
692
693        let pratt = builder.pratt(builder.parse("primary"), |ops| {
694            ops.prefix("-", 10, quote!(|e| Expr::Neg(e)))
695                .infix("+", 5, Assoc::Left, quote!(|l, r| Expr::Add(l, r)))
696                .postfix("++", 15, quote!(|e| Expr::PostInc(e)))
697        });
698
699        assert!(matches!(pratt, Combinator::Pratt(_)));
700    }
701
702    #[test]
703    fn test_ast_mapping() {
704        let builder = RuleBuilder::new("test");
705
706        let mapped = builder
707            .sequence((builder.lit("a"), builder.lit("b")))
708            .ast(quote!(|(a, b)| Node { a, b }));
709
710        assert!(matches!(mapped, Combinator::Mapped { .. }));
711    }
712
713    #[test]
714    fn test_memoize() {
715        let builder = RuleBuilder::new("test");
716
717        let memoized = builder.memoize(0, builder.parse("expensive_rule"));
718
719        assert!(matches!(memoized, Combinator::Memoize { id: 0, .. }));
720    }
721}