Skip to main content

trampoline_parser/
lib.rs

1//! Trampoline Parser Generator
2//!
3//! A DSL for generating fully trampoline-based scannerless parsers.
4//!
5//! # Example
6//!
7//! ```rust
8//! use trampoline_parser::Grammar;
9//!
10//! let grammar = Grammar::new()
11//!     .rule("number", |r| {
12//!         r.capture(r.one_or_more(r.digit()))
13//!     })
14//!     .rule("expr", |r| {
15//!         r.sequence((
16//!             r.parse("number"),
17//!             r.lit("+"),
18//!             r.parse("number"),
19//!         ))
20//!     })
21//!     .build();
22//!
23//! let code = grammar.generate();
24//! ```
25
26mod codegen;
27pub mod grammars;
28mod ir;
29mod parser_dsl;
30pub mod prefix_factoring;
31mod validation;
32
33pub use codegen::*;
34pub use ir::*;
35pub use parser_dsl::*;
36pub use prefix_factoring::{
37    identify_memoization_candidates, BacktrackingSeverity, BacktrackingWarning,
38};
39pub use validation::{validate_grammar, ValidationError};
40
41/// Builder for AST configuration
42#[derive(Debug, Default)]
43pub struct AstConfigBuilder {
44    config: AstConfig,
45}
46
47impl AstConfigBuilder {
48    pub fn new() -> Self {
49        Self::default()
50    }
51
52    /// Add an import statement (e.g., "crate::ast::*")
53    pub fn import(mut self, import_path: &str) -> Self {
54        self.config.imports.push(import_path.to_string());
55        self
56    }
57
58    /// Set the return type of the parse() function
59    pub fn result_type(mut self, result_type: &str) -> Self {
60        self.config.result_type = Some(result_type.to_string());
61        self
62    }
63
64    /// Set the external span type (disables internal Span generation)
65    pub fn span_type(mut self, span_type: &str) -> Self {
66        self.config.span_type = Some(span_type.to_string());
67        self.config.generate_span = false;
68        self
69    }
70
71    /// Set the external string type (e.g., "JsString")
72    pub fn string_type(mut self, string_type: &str) -> Self {
73        self.config.string_type = Some(string_type.to_string());
74        self
75    }
76
77    /// Set the external error type (disables internal ParseError generation)
78    pub fn error_type(mut self, error_type: &str) -> Self {
79        self.config.error_type = Some(error_type.to_string());
80        self.config.generate_parse_error = false;
81        self
82    }
83
84    /// Disable ParseResult enum generation
85    pub fn no_parse_result(mut self) -> Self {
86        self.config.generate_parse_result = false;
87        self
88    }
89
90    /// Enable AST mapping application
91    pub fn apply_mappings(mut self) -> Self {
92        self.config.apply_mappings = true;
93        self
94    }
95
96    /// Enable string dictionary integration for string interning
97    pub fn string_dict(mut self, dict_type: &str) -> Self {
98        self.config.string_dict_type = Some(dict_type.to_string());
99        self
100    }
101
102    /// Set custom method name for string interning (default: "get_or_insert")
103    pub fn string_dict_method(mut self, method: &str) -> Self {
104        self.config.string_dict_method = Some(method.to_string());
105        self
106    }
107
108    /// Add helper code (functions, constants) to be included in the generated parser
109    pub fn helper(mut self, code: &str) -> Self {
110        self.config.helper_code.push(code.to_string());
111        self
112    }
113
114    /// Add a custom ParseResult variant for typed AST nodes
115    pub fn result_variant(mut self, name: &str, rust_type: &str) -> Self {
116        self.config.result_variants.push(ir::ResultVariant {
117            name: name.to_string(),
118            rust_type: rust_type.to_string(),
119            span_expr: None,
120        });
121        self
122    }
123
124    /// Add a custom ParseResult variant with custom span extraction
125    pub fn result_variant_with_span(
126        mut self,
127        name: &str,
128        rust_type: &str,
129        span_expr: &str,
130    ) -> Self {
131        self.config.result_variants.push(ir::ResultVariant {
132            name: name.to_string(),
133            rust_type: rust_type.to_string(),
134            span_expr: Some(span_expr.to_string()),
135        });
136        self
137    }
138
139    /// Build the final AstConfig
140    pub fn build(self) -> AstConfig {
141        self.config
142    }
143}
144
145/// Operator associativity for Pratt parsing
146#[derive(Debug, Clone, Copy, PartialEq, Eq)]
147pub enum Assoc {
148    Left,
149    Right,
150}
151
152/// Main grammar builder
153#[derive(Debug, Default)]
154pub struct Grammar {
155    pub rules: Vec<RuleDef>,
156    pub ast_config: AstConfig,
157}
158
159impl Grammar {
160    pub fn new() -> Self {
161        Self::default()
162    }
163
164    /// Configure AST integration settings
165    pub fn ast_config<F>(mut self, f: F) -> Self
166    where
167        F: FnOnce(AstConfigBuilder) -> AstConfigBuilder,
168    {
169        let builder = AstConfigBuilder::new();
170        let builder = f(builder);
171        self.ast_config = builder.build();
172        self
173    }
174
175    /// Define a parser rule
176    pub fn rule<F>(mut self, name: &str, f: F) -> Self
177    where
178        F: FnOnce(&RuleBuilder) -> Combinator,
179    {
180        let builder = RuleBuilder::new(name);
181        let combinator = f(&builder);
182        self.rules.push(RuleDef {
183            name: name.to_string(),
184            combinator,
185        });
186        self
187    }
188
189    /// Finalize and validate the grammar
190    pub fn build(self) -> CompiledGrammar {
191        let errors = validation::validate_grammar(&self.rules);
192        for error in &errors {
193            eprintln!("Grammar warning: {}", error);
194        }
195
196        CompiledGrammar {
197            rules: self.rules,
198            ast_config: self.ast_config,
199        }
200    }
201
202    /// Build with automatic backtracking optimization
203    ///
204    /// This is equivalent to calling `.build().optimize_backtracking()`.
205    /// It detects Choice nodes with shared prefixes containing recursive rules
206    /// and rewrites them to factor out the common prefix.
207    pub fn build_optimized(self) -> CompiledGrammar {
208        self.build().optimize_backtracking()
209    }
210
211    /// Build with automatic memoization to avoid exponential backtracking.
212    ///
213    /// This analyzes the grammar to identify rules that would benefit from
214    /// memoization and automatically wraps them. Use this when you have
215    /// patterns that cause exponential backtracking (like TypeScript's
216    /// `identifier<types>(args)` vs comparison operators).
217    ///
218    /// The process:
219    /// 1. Analyze the grammar to find Choice nodes with shared recursive prefixes
220    /// 2. Identify rule references in those prefixes
221    /// 3. Wrap those rules with memoization
222    /// 4. Build the grammar
223    pub fn build_with_memoization(mut self) -> CompiledGrammar {
224        let candidates = prefix_factoring::identify_memoization_candidates(&self.rules);
225
226        if !candidates.is_empty() {
227            // Assign unique memo IDs to each candidate
228            let mut memo_id = 0;
229
230            // Wrap candidate rules with memoization
231            for rule in &mut self.rules {
232                if candidates.contains(&rule.name) {
233                    rule.combinator = Combinator::Memoize {
234                        id: memo_id,
235                        inner: Box::new(rule.combinator.clone()),
236                    };
237                    memo_id += 1;
238                }
239            }
240        }
241
242        self.build()
243    }
244
245    /// Build with both prefix factoring optimization and automatic memoization.
246    ///
247    /// This combines the benefits of both optimization strategies:
248    /// 1. Prefix factoring rewrites Choice nodes to factor out common prefixes
249    /// 2. Memoization caches results for rules that cause exponential backtracking
250    ///
251    /// Use this for the most comprehensive backtracking prevention.
252    pub fn build_optimized_with_memoization(mut self) -> CompiledGrammar {
253        let candidates = prefix_factoring::identify_memoization_candidates(&self.rules);
254
255        if !candidates.is_empty() {
256            let mut memo_id = 0;
257            for rule in &mut self.rules {
258                if candidates.contains(&rule.name) {
259                    rule.combinator = Combinator::Memoize {
260                        id: memo_id,
261                        inner: Box::new(rule.combinator.clone()),
262                    };
263                    memo_id += 1;
264                }
265            }
266        }
267
268        self.build().optimize_backtracking()
269    }
270}
271
272/// Compiled grammar ready for code generation
273#[derive(Debug)]
274pub struct CompiledGrammar {
275    pub rules: Vec<RuleDef>,
276    pub ast_config: AstConfig,
277}
278
279impl CompiledGrammar {
280    /// Generate Rust source code for the parser
281    pub fn generate(&self) -> String {
282        CodeGenerator::new(self).generate()
283    }
284
285    /// Analyze the grammar for backtracking issues.
286    ///
287    /// Returns warnings for each Choice node that has alternatives sharing
288    /// a common prefix containing recursive rules (causing O(2^n) parsing).
289    pub fn analyze_backtracking(&self) -> Vec<BacktrackingWarning> {
290        prefix_factoring::analyze_grammar(&self.rules)
291    }
292
293    /// Optimize the grammar by factoring out common prefixes in Choice nodes.
294    ///
295    /// This transforms patterns like:
296    /// ```text
297    /// Choice([Seq([A, B, C]), Seq([A, B, D])])
298    /// ```
299    /// Into:
300    /// ```text
301    /// Seq([A, B, Choice([C, D])])
302    /// ```
303    ///
304    /// Only transforms choices with `Exponential` severity (shared prefix
305    /// containing recursive rules).
306    pub fn optimize_backtracking(self) -> Self {
307        use std::collections::HashMap;
308
309        // Build rule map from references before consuming self
310        let rule_map: HashMap<&str, &Combinator> = self
311            .rules
312            .iter()
313            .map(|r| (r.name.as_str(), &r.combinator))
314            .collect();
315
316        // Optimize each rule's combinator
317        let optimized_rules: Vec<RuleDef> = self
318            .rules
319            .iter()
320            .map(|rule| RuleDef {
321                name: rule.name.clone(),
322                combinator: prefix_factoring::optimize_combinator(&rule.combinator, &rule_map),
323            })
324            .collect();
325
326        CompiledGrammar {
327            rules: optimized_rules,
328            ast_config: self.ast_config,
329        }
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn test_basic_grammar() {
339        let grammar = Grammar::new()
340            .rule("number", |r| r.capture(r.one_or_more(r.digit())))
341            .rule("expr", |r| {
342                r.sequence((r.parse("number"), r.lit("+"), r.parse("number")))
343            })
344            .build();
345
346        assert_eq!(grammar.rules.len(), 2);
347        assert_eq!(grammar.rules[0].name, "number");
348        assert_eq!(grammar.rules[1].name, "expr");
349    }
350}