Skip to main content

trampoline_parser/
parser_dsl.rs

1//! Parser DSL for defining grammar rules (scannerless parsing)
2//!
3//! # Example
4//!
5//! ```rust
6//! use trampoline_parser::Grammar;
7//!
8//! let grammar = Grammar::new()
9//!     .rule("number", |r| {
10//!         r.capture(r.one_or_more(r.digit()))
11//!     })
12//!     .rule("expr", |r| {
13//!         r.sequence((
14//!             r.parse("number"),
15//!             r.lit("+"),
16//!             r.parse("number"),
17//!         ))
18//!     });
19//! ```
20
21use crate::ir::{CharClass, Combinator, InfixOp, PostfixOp, PrattDef, PrefixOp, TernaryOp};
22use crate::Assoc;
23
24/// Builder for parser rules
25#[derive(Debug)]
26pub struct RuleBuilder {
27    #[allow(dead_code)]
28    name: String,
29}
30
31impl RuleBuilder {
32    pub fn new(name: &str) -> Self {
33        Self {
34            name: name.to_string(),
35        }
36    }
37
38    /// Reference another rule by name
39    pub fn parse(&self, rule_name: &str) -> Combinator {
40        Combinator::Rule(rule_name.to_string())
41    }
42
43    // === Character-level primitives (scannerless parsing) ===
44
45    /// Match a literal string exactly (e.g., "if", "===", "+")
46    pub fn lit(&self, s: &str) -> Combinator {
47        Combinator::Literal(s.to_string())
48    }
49
50    /// Match a single specific character
51    pub fn char(&self, c: char) -> Combinator {
52        Combinator::Char(c)
53    }
54
55    /// Match any decimal digit (0-9)
56    pub fn digit(&self) -> Combinator {
57        Combinator::CharClass(CharClass::Digit)
58    }
59
60    /// Match any hexadecimal digit (0-9, a-f, A-F)
61    pub fn hex_digit(&self) -> Combinator {
62        Combinator::CharClass(CharClass::HexDigit)
63    }
64
65    /// Match any alphabetic character (a-z, A-Z)
66    pub fn alpha(&self) -> Combinator {
67        Combinator::CharClass(CharClass::Alpha)
68    }
69
70    /// Match any alphanumeric character (a-z, A-Z, 0-9)
71    pub fn alpha_num(&self) -> Combinator {
72        Combinator::CharClass(CharClass::AlphaNumeric)
73    }
74
75    /// Match any whitespace character (space, tab, newline, etc.)
76    pub fn ws(&self) -> Combinator {
77        Combinator::CharClass(CharClass::Whitespace)
78    }
79
80    /// Match identifier start character (a-z, A-Z, _, $)
81    pub fn ident_start(&self) -> Combinator {
82        Combinator::CharClass(CharClass::IdentStart)
83    }
84
85    /// Match identifier continue character (a-z, A-Z, 0-9, _, $)
86    pub fn ident_cont(&self) -> Combinator {
87        Combinator::CharClass(CharClass::IdentCont)
88    }
89
90    /// Match any single character
91    pub fn any_char(&self) -> Combinator {
92        Combinator::AnyChar
93    }
94
95    /// Match a character in the given range (inclusive)
96    pub fn range(&self, from: char, to: char) -> Combinator {
97        Combinator::CharRange(from, to)
98    }
99
100    /// Negative lookahead: succeed if inner does NOT match, consume nothing
101    pub fn not_followed_by(&self, inner: Combinator) -> Combinator {
102        Combinator::NotFollowedBy(Box::new(inner))
103    }
104
105    /// Positive lookahead: succeed if inner matches, consume nothing
106    pub fn followed_by(&self, inner: Combinator) -> Combinator {
107        Combinator::FollowedBy(Box::new(inner))
108    }
109
110    /// Capture the matched text as a string token
111    pub fn capture(&self, inner: Combinator) -> Combinator {
112        Combinator::Capture(Box::new(inner))
113    }
114
115    /// Memoize the result of parsing to avoid exponential backtracking.
116    ///
117    /// When a memoized combinator is tried at a position, the result is cached.
118    /// If the same combinator is tried again at the same position (due to
119    /// backtracking), the cached result is returned instead of re-parsing.
120    ///
121    /// Use this for rules that:
122    /// 1. Appear in multiple Choice alternatives
123    /// 2. Contain recursion
124    /// 3. Are frequently backtracked
125    ///
126    /// The `id` parameter must be unique across all memoization points.
127    pub fn memoize(&self, id: usize, inner: Combinator) -> Combinator {
128        Combinator::Memoize {
129            id,
130            inner: Box::new(inner),
131        }
132    }
133
134    /// Sequence of combinators
135    pub fn sequence<T: IntoCombinatorsVec>(&self, items: T) -> Combinator {
136        Combinator::Sequence(items.into_combinators_vec())
137    }
138
139    /// Ordered choice (first match wins, auto-backtrack)
140    pub fn choice<T: IntoCombinatorsVec>(&self, items: T) -> Combinator {
141        Combinator::Choice(items.into_combinators_vec())
142    }
143
144    /// Zero or more
145    pub fn zero_or_more(&self, inner: Combinator) -> Combinator {
146        Combinator::ZeroOrMore(Box::new(inner))
147    }
148
149    /// One or more
150    pub fn one_or_more(&self, inner: Combinator) -> Combinator {
151        Combinator::OneOrMore(Box::new(inner))
152    }
153
154    /// Optional (zero or one)
155    pub fn optional(&self, inner: Combinator) -> Combinator {
156        Combinator::Optional(Box::new(inner))
157    }
158
159    /// Parse but discard result
160    pub fn skip(&self, inner: Combinator) -> Combinator {
161        Combinator::Skip(Box::new(inner))
162    }
163
164    /// Separated list: item (sep item)*
165    pub fn separated_by(&self, item: Combinator, separator: Combinator) -> Combinator {
166        Combinator::SeparatedBy {
167            item: Box::new(item),
168            separator: Box::new(separator),
169            trailing: false,
170        }
171    }
172
173    /// Separated list with optional trailing separator
174    pub fn separated_by_trailing(&self, item: Combinator, separator: Combinator) -> Combinator {
175        Combinator::SeparatedBy {
176            item: Box::new(item),
177            separator: Box::new(separator),
178            trailing: true,
179        }
180    }
181
182    /// Pratt expression parsing
183    pub fn pratt<F>(&self, operand: Combinator, f: F) -> Combinator
184    where
185        F: FnOnce(PrattBuilder) -> PrattBuilder,
186    {
187        let builder = PrattBuilder::new(operand);
188        let builder = f(builder);
189        Combinator::Pratt(builder.build())
190    }
191}
192
193/// Extension trait for Combinator to add AST mapping
194pub trait CombinatorExt {
195    fn ast(self, mapping: &str) -> Combinator;
196}
197
198impl CombinatorExt for Combinator {
199    /// Apply AST mapping to this combinator
200    fn ast(self, mapping: &str) -> Combinator {
201        Combinator::Mapped {
202            inner: Box::new(self),
203            mapping: mapping.to_string(),
204        }
205    }
206}
207
208/// Trait for converting tuples to Vec<Combinator>
209pub trait IntoCombinatorsVec {
210    fn into_combinators_vec(self) -> Vec<Combinator>;
211}
212
213// Implement for various tuple sizes
214impl IntoCombinatorsVec for (Combinator,) {
215    fn into_combinators_vec(self) -> Vec<Combinator> {
216        vec![self.0]
217    }
218}
219
220impl IntoCombinatorsVec for (Combinator, Combinator) {
221    fn into_combinators_vec(self) -> Vec<Combinator> {
222        vec![self.0, self.1]
223    }
224}
225
226impl IntoCombinatorsVec for (Combinator, Combinator, Combinator) {
227    fn into_combinators_vec(self) -> Vec<Combinator> {
228        vec![self.0, self.1, self.2]
229    }
230}
231
232impl IntoCombinatorsVec for (Combinator, Combinator, Combinator, Combinator) {
233    fn into_combinators_vec(self) -> Vec<Combinator> {
234        vec![self.0, self.1, self.2, self.3]
235    }
236}
237
238impl IntoCombinatorsVec for (Combinator, Combinator, Combinator, Combinator, Combinator) {
239    fn into_combinators_vec(self) -> Vec<Combinator> {
240        vec![self.0, self.1, self.2, self.3, self.4]
241    }
242}
243
244impl IntoCombinatorsVec
245    for (
246        Combinator,
247        Combinator,
248        Combinator,
249        Combinator,
250        Combinator,
251        Combinator,
252    )
253{
254    fn into_combinators_vec(self) -> Vec<Combinator> {
255        vec![self.0, self.1, self.2, self.3, self.4, self.5]
256    }
257}
258
259impl IntoCombinatorsVec
260    for (
261        Combinator,
262        Combinator,
263        Combinator,
264        Combinator,
265        Combinator,
266        Combinator,
267        Combinator,
268    )
269{
270    fn into_combinators_vec(self) -> Vec<Combinator> {
271        vec![self.0, self.1, self.2, self.3, self.4, self.5, self.6]
272    }
273}
274
275impl IntoCombinatorsVec
276    for (
277        Combinator,
278        Combinator,
279        Combinator,
280        Combinator,
281        Combinator,
282        Combinator,
283        Combinator,
284        Combinator,
285    )
286{
287    fn into_combinators_vec(self) -> Vec<Combinator> {
288        vec![
289            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7,
290        ]
291    }
292}
293
294impl IntoCombinatorsVec
295    for (
296        Combinator,
297        Combinator,
298        Combinator,
299        Combinator,
300        Combinator,
301        Combinator,
302        Combinator,
303        Combinator,
304        Combinator,
305    )
306{
307    fn into_combinators_vec(self) -> Vec<Combinator> {
308        vec![
309            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7, self.8,
310        ]
311    }
312}
313
314impl IntoCombinatorsVec
315    for (
316        Combinator,
317        Combinator,
318        Combinator,
319        Combinator,
320        Combinator,
321        Combinator,
322        Combinator,
323        Combinator,
324        Combinator,
325        Combinator,
326    )
327{
328    fn into_combinators_vec(self) -> Vec<Combinator> {
329        vec![
330            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7, self.8, self.9,
331        ]
332    }
333}
334
335impl IntoCombinatorsVec
336    for (
337        Combinator,
338        Combinator,
339        Combinator,
340        Combinator,
341        Combinator,
342        Combinator,
343        Combinator,
344        Combinator,
345        Combinator,
346        Combinator,
347        Combinator,
348    )
349{
350    fn into_combinators_vec(self) -> Vec<Combinator> {
351        vec![
352            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7, self.8, self.9, self.10,
353        ]
354    }
355}
356
357impl IntoCombinatorsVec
358    for (
359        Combinator,
360        Combinator,
361        Combinator,
362        Combinator,
363        Combinator,
364        Combinator,
365        Combinator,
366        Combinator,
367        Combinator,
368        Combinator,
369        Combinator,
370        Combinator,
371    )
372{
373    fn into_combinators_vec(self) -> Vec<Combinator> {
374        vec![
375            self.0, self.1, self.2, self.3, self.4, self.5, self.6, self.7, self.8, self.9,
376            self.10, self.11,
377        ]
378    }
379}
380
381impl IntoCombinatorsVec for Vec<Combinator> {
382    fn into_combinators_vec(self) -> Vec<Combinator> {
383        self
384    }
385}
386
387/// Builder for Pratt parsing operators
388#[derive(Debug)]
389pub struct PrattBuilder {
390    operand: Combinator,
391    prefix_ops: Vec<PrefixOp>,
392    infix_ops: Vec<InfixOp>,
393    postfix_ops: Vec<PostfixOp>,
394    ternary: Option<TernaryOp>,
395}
396
397impl PrattBuilder {
398    fn new(operand: Combinator) -> Self {
399        Self {
400            operand,
401            prefix_ops: Vec::new(),
402            infix_ops: Vec::new(),
403            postfix_ops: Vec::new(),
404            ternary: None,
405        }
406    }
407
408    /// Define a prefix operator with a pattern
409    /// Example: `ops.prefix("-", 16, "|e| unary(e, Neg)")`
410    /// Example: `ops.prefix(r.sequence((r.lit("-"), r.not_followed_by(r.lit("-")))), 16, "...")`
411    pub fn prefix(mut self, pattern: impl Into<Combinator>, precedence: u8, mapping: &str) -> Self {
412        self.prefix_ops.push(PrefixOp {
413            pattern: Box::new(pattern.into()),
414            precedence,
415            mapping: mapping.to_string(),
416        });
417        self
418    }
419
420    /// Define a prefix operator for a keyword (ensures not followed by identifier char)
421    /// Example: `ops.prefix_kw("typeof", 16, "|e| unary(e, Typeof)")`
422    pub fn prefix_kw(mut self, keyword: &str, precedence: u8, mapping: &str) -> Self {
423        self.prefix_ops.push(PrefixOp {
424            pattern: Box::new(Combinator::Sequence(vec![
425                Combinator::Literal(keyword.to_string()),
426                Combinator::NotFollowedBy(Box::new(Combinator::CharClass(CharClass::IdentCont))),
427            ])),
428            precedence,
429            mapping: mapping.to_string(),
430        });
431        self
432    }
433
434    /// Define an infix operator with a pattern
435    /// Example: `ops.infix("+", 13, Assoc::Left, "|l, r| binary(l, r, Add)")`
436    /// Example: `ops.infix(r.sequence((r.lit("-"), r.not_followed_by(r.lit("-")))), 9, Left, "...")`
437    pub fn infix(
438        mut self,
439        pattern: impl Into<Combinator>,
440        precedence: u8,
441        assoc: Assoc,
442        mapping: &str,
443    ) -> Self {
444        self.infix_ops.push(InfixOp {
445            pattern: Box::new(pattern.into()),
446            precedence,
447            assoc,
448            mapping: mapping.to_string(),
449        });
450        self
451    }
452
453    /// Define an infix operator for a keyword (ensures not followed by identifier char)
454    /// Example: `ops.infix_kw("in", 11, Assoc::Left, "|l, r| binary(l, r, In)")`
455    pub fn infix_kw(mut self, keyword: &str, precedence: u8, assoc: Assoc, mapping: &str) -> Self {
456        self.infix_ops.push(InfixOp {
457            pattern: Box::new(Combinator::Sequence(vec![
458                Combinator::Literal(keyword.to_string()),
459                Combinator::NotFollowedBy(Box::new(Combinator::CharClass(CharClass::IdentCont))),
460            ])),
461            precedence,
462            assoc,
463            mapping: mapping.to_string(),
464        });
465        self
466    }
467
468    /// Define a simple postfix operator with a pattern (++, --)
469    /// Example: `ops.postfix("++", 17, "|e| update(e, Increment, false)")`
470    pub fn postfix(
471        mut self,
472        pattern: impl Into<Combinator>,
473        precedence: u8,
474        mapping: &str,
475    ) -> Self {
476        self.postfix_ops.push(PostfixOp::Simple {
477            pattern: Box::new(pattern.into()),
478            precedence,
479            mapping: mapping.to_string(),
480        });
481        self
482    }
483
484    /// Define a call expression postfix: callee(args)
485    /// Example: `ops.postfix_call("(", ")", ",", 18, "|callee, args| call(callee, args)")`
486    pub fn postfix_call(
487        mut self,
488        open: &str,
489        close: &str,
490        separator: &str,
491        precedence: u8,
492        mapping: &str,
493    ) -> Self {
494        self.postfix_ops.push(PostfixOp::Call {
495            open: Box::new(Combinator::Literal(open.to_string())),
496            close: Box::new(Combinator::Literal(close.to_string())),
497            separator: Box::new(Combinator::Literal(separator.to_string())),
498            arg_rule: None,
499            precedence,
500            mapping: mapping.to_string(),
501        });
502        self
503    }
504
505    /// Define a call expression postfix with a custom argument rule: callee(args)
506    /// The arg_rule is used to parse each argument (e.g., to support spread)
507    /// Example: `ops.postfix_call_with_arg_rule("(", ")", ",", "call_argument", 18, "|callee, args| call(callee, args)")`
508    pub fn postfix_call_with_arg_rule(
509        mut self,
510        open: &str,
511        close: &str,
512        separator: &str,
513        arg_rule: &str,
514        precedence: u8,
515        mapping: &str,
516    ) -> Self {
517        self.postfix_ops.push(PostfixOp::Call {
518            open: Box::new(Combinator::Literal(open.to_string())),
519            close: Box::new(Combinator::Literal(close.to_string())),
520            separator: Box::new(Combinator::Literal(separator.to_string())),
521            arg_rule: Some(arg_rule.to_string()),
522            precedence,
523            mapping: mapping.to_string(),
524        });
525        self
526    }
527
528    /// Define an index expression postfix: obj[index]
529    /// Example: `ops.postfix_index("[", "]", 18, "|obj, prop| member_computed(obj, prop)")`
530    pub fn postfix_index(mut self, open: &str, close: &str, precedence: u8, mapping: &str) -> Self {
531        self.postfix_ops.push(PostfixOp::Index {
532            open: Box::new(Combinator::Literal(open.to_string())),
533            close: Box::new(Combinator::Literal(close.to_string())),
534            precedence,
535            mapping: mapping.to_string(),
536        });
537        self
538    }
539
540    /// Define a member access postfix: obj.prop
541    /// Example: `ops.postfix_member(".", 18, "|obj, prop| member(obj, prop)")`
542    pub fn postfix_member(mut self, literal: &str, precedence: u8, mapping: &str) -> Self {
543        self.postfix_ops.push(PostfixOp::Member {
544            pattern: Box::new(Combinator::Literal(literal.to_string())),
545            precedence,
546            mapping: mapping.to_string(),
547        });
548        self
549    }
550
551    /// Define a member access postfix with a custom pattern
552    /// Use this when you need not_followed_by constraints
553    /// Example: `ops.postfix_member_pattern(r.sequence((r.lit("."), r.not_followed_by(r.char('.')))), 18, "|obj, prop| member(obj, prop)")`
554    pub fn postfix_member_pattern(
555        mut self,
556        pattern: Combinator,
557        precedence: u8,
558        mapping: &str,
559    ) -> Self {
560        self.postfix_ops.push(PostfixOp::Member {
561            pattern: Box::new(pattern),
562            precedence,
563            mapping: mapping.to_string(),
564        });
565        self
566    }
567
568    /// Define a rule-based postfix: parses another rule as the suffix
569    /// Used for tagged template literals: tag`template`
570    /// Example: `ops.postfix_rule("template_literal", 18, "|tag, template| tagged_template(tag, template)")`
571    pub fn postfix_rule(mut self, rule_name: &str, precedence: u8, mapping: &str) -> Self {
572        self.postfix_ops.push(PostfixOp::Rule {
573            rule_name: rule_name.to_string(),
574            precedence,
575            mapping: mapping.to_string(),
576        });
577        self
578    }
579
580    /// Define a ternary operator: cond ? then : else
581    /// Example: `ops.ternary("?", ":", 3, "|c, t, f| conditional(c, t, f)")`
582    pub fn ternary(mut self, first: &str, second: &str, precedence: u8, mapping: &str) -> Self {
583        self.ternary = Some(TernaryOp {
584            first: Box::new(Combinator::Literal(first.to_string())),
585            second: Box::new(Combinator::Literal(second.to_string())),
586            precedence,
587            mapping: mapping.to_string(),
588        });
589        self
590    }
591
592    fn build(self) -> PrattDef {
593        PrattDef {
594            operand: Box::new(Some(self.operand)),
595            prefix_ops: self.prefix_ops,
596            infix_ops: self.infix_ops,
597            postfix_ops: self.postfix_ops,
598            ternary: self.ternary,
599        }
600    }
601}
602
603#[cfg(test)]
604mod tests {
605    use super::*;
606
607    #[test]
608    fn test_basic_combinators() {
609        let builder = RuleBuilder::new("test");
610
611        let seq = builder.sequence((builder.lit("a"), builder.lit("b")));
612        assert!(matches!(seq, Combinator::Sequence(_)));
613
614        let choice = builder.choice((builder.lit("a"), builder.lit("b")));
615        assert!(matches!(choice, Combinator::Choice(_)));
616    }
617
618    #[test]
619    fn test_char_level_primitives() {
620        let builder = RuleBuilder::new("test");
621
622        // Test literal
623        assert!(matches!(builder.lit("hello"), Combinator::Literal(_)));
624
625        // Test char
626        assert!(matches!(builder.char('x'), Combinator::Char('x')));
627
628        // Test character classes
629        assert!(matches!(
630            builder.digit(),
631            Combinator::CharClass(CharClass::Digit)
632        ));
633        assert!(matches!(
634            builder.alpha(),
635            Combinator::CharClass(CharClass::Alpha)
636        ));
637        assert!(matches!(
638            builder.ident_start(),
639            Combinator::CharClass(CharClass::IdentStart)
640        ));
641
642        // Test range
643        assert!(matches!(
644            builder.range('a', 'z'),
645            Combinator::CharRange('a', 'z')
646        ));
647
648        // Test any_char
649        assert!(matches!(builder.any_char(), Combinator::AnyChar));
650
651        // Test capture
652        assert!(matches!(
653            builder.capture(builder.digit()),
654            Combinator::Capture(_)
655        ));
656
657        // Test lookahead
658        assert!(matches!(
659            builder.not_followed_by(builder.digit()),
660            Combinator::NotFollowedBy(_)
661        ));
662    }
663
664    #[test]
665    fn test_pratt_builder() {
666        let builder = RuleBuilder::new("expr");
667
668        let pratt = builder.pratt(builder.parse("primary"), |ops| {
669            ops.prefix("-", 10, "|e| Expr::Neg(e)")
670                .infix("+", 5, Assoc::Left, "|l, r| Expr::Add(l, r)")
671                .postfix("++", 15, "|e| Expr::PostInc(e)")
672        });
673
674        assert!(matches!(pratt, Combinator::Pratt(_)));
675    }
676
677    #[test]
678    fn test_ast_mapping() {
679        let builder = RuleBuilder::new("test");
680
681        let mapped = builder
682            .sequence((builder.lit("a"), builder.lit("b")))
683            .ast("|(a, b)| Node { a, b }");
684
685        assert!(matches!(mapped, Combinator::Mapped { .. }));
686    }
687
688    #[test]
689    fn test_memoize() {
690        let builder = RuleBuilder::new("test");
691
692        let memoized = builder.memoize(0, builder.parse("expensive_rule"));
693
694        assert!(matches!(memoized, Combinator::Memoize { id: 0, .. }));
695    }
696}