Skip to main content

trampoline_parser/
lib.rs

1//! Trampoline Parser Generator
2//!
3//! A DSL for generating fully trampoline-based scannerless parsers.
4//!
5//! # Example
6//!
7//! ```rust
8//! use trampoline_parser::Grammar;
9//!
10//! let grammar = Grammar::new()
11//!     .rule("number", |r| {
12//!         r.capture(r.one_or_more(r.digit()))
13//!     })
14//!     .rule("expr", |r| {
15//!         r.sequence((
16//!             r.parse("number"),
17//!             r.lit("+"),
18//!             r.parse("number"),
19//!         ))
20//!     })
21//!     .build();
22//!
23//! let code = grammar.generate();
24//! ```
25
26mod codegen;
27pub mod grammars;
28mod ir;
29mod parser_dsl;
30pub mod prefix_factoring;
31mod validation;
32
33pub use codegen::*;
34pub use ir::*;
35pub use parser_dsl::*;
36pub use prefix_factoring::{
37    BacktrackingSeverity, BacktrackingWarning, identify_memoization_candidates,
38};
39pub use proc_macro2::TokenStream;
40pub use validation::{ValidationError, validate_grammar};
41
42/// Builder for AST configuration
43#[derive(Debug, Default)]
44pub struct AstConfigBuilder {
45    config: AstConfig,
46}
47
48impl AstConfigBuilder {
49    pub fn new() -> Self {
50        Self::default()
51    }
52
53    /// Add an import statement (e.g., "crate::ast::*")
54    pub fn import(mut self, import_path: &str) -> Self {
55        self.config.imports.push(import_path.to_string());
56        self
57    }
58
59    /// Set the return type of the parse() function
60    pub fn result_type(mut self, result_type: &str) -> Self {
61        self.config.result_type = Some(result_type.to_string());
62        self
63    }
64
65    /// Set the external span type (disables internal Span generation)
66    pub fn span_type(mut self, span_type: &str) -> Self {
67        self.config.span_type = Some(span_type.to_string());
68        self.config.generate_span = false;
69        self
70    }
71
72    /// Set the external string type (e.g., "JsString")
73    pub fn string_type(mut self, string_type: &str) -> Self {
74        self.config.string_type = Some(string_type.to_string());
75        self
76    }
77
78    /// Set the external error type (disables internal ParseError generation)
79    pub fn error_type(mut self, error_type: &str) -> Self {
80        self.config.error_type = Some(error_type.to_string());
81        self.config.generate_parse_error = false;
82        self
83    }
84
85    /// Disable ParseResult enum generation
86    pub fn no_parse_result(mut self) -> Self {
87        self.config.generate_parse_result = false;
88        self
89    }
90
91    /// Enable AST mapping application
92    pub fn apply_mappings(mut self) -> Self {
93        self.config.apply_mappings = true;
94        self
95    }
96
97    /// Enable string dictionary integration for string interning
98    pub fn string_dict(mut self, dict_type: &str) -> Self {
99        self.config.string_dict_type = Some(dict_type.to_string());
100        self
101    }
102
103    /// Set custom method name for string interning (default: "get_or_insert")
104    pub fn string_dict_method(mut self, method: &str) -> Self {
105        self.config.string_dict_method = Some(method.to_string());
106        self
107    }
108
109    /// Add helper code (functions, constants) to be included in the generated parser
110    pub fn helper(mut self, code: &str) -> Self {
111        self.config.helper_code.push(code.to_string());
112        self
113    }
114
115    /// Add a custom ParseResult variant for typed AST nodes
116    pub fn result_variant(mut self, name: &str, rust_type: &str) -> Self {
117        self.config.result_variants.push(ir::ResultVariant {
118            name: name.to_string(),
119            rust_type: rust_type.to_string(),
120            span_expr: None,
121        });
122        self
123    }
124
125    /// Add a custom ParseResult variant with custom span extraction
126    pub fn result_variant_with_span(
127        mut self,
128        name: &str,
129        rust_type: &str,
130        span_expr: &str,
131    ) -> Self {
132        self.config.result_variants.push(ir::ResultVariant {
133            name: name.to_string(),
134            rust_type: rust_type.to_string(),
135            span_expr: Some(span_expr.to_string()),
136        });
137        self
138    }
139
140    /// Build the final AstConfig
141    pub fn build(self) -> AstConfig {
142        self.config
143    }
144}
145
146/// Operator associativity for Pratt parsing
147#[derive(Debug, Clone, Copy, PartialEq, Eq)]
148pub enum Assoc {
149    Left,
150    Right,
151}
152
153/// Main grammar builder
154#[derive(Debug, Default)]
155pub struct Grammar {
156    pub rules: Vec<RuleDef>,
157    pub ast_config: AstConfig,
158}
159
160impl Grammar {
161    pub fn new() -> Self {
162        Self::default()
163    }
164
165    /// Configure AST integration settings
166    pub fn ast_config<F>(mut self, f: F) -> Self
167    where
168        F: FnOnce(AstConfigBuilder) -> AstConfigBuilder,
169    {
170        let builder = AstConfigBuilder::new();
171        let builder = f(builder);
172        self.ast_config = builder.build();
173        self
174    }
175
176    /// Define a parser rule
177    pub fn rule<F>(mut self, name: &str, f: F) -> Self
178    where
179        F: FnOnce(&RuleBuilder) -> Combinator,
180    {
181        let builder = RuleBuilder::new(name);
182        let combinator = f(&builder);
183        self.rules.push(RuleDef {
184            name: name.to_string(),
185            combinator,
186        });
187        self
188    }
189
190    /// Finalize and validate the grammar
191    pub fn build(self) -> CompiledGrammar {
192        let errors = validation::validate_grammar(&self.rules);
193        for error in &errors {
194            eprintln!("Grammar warning: {}", error);
195        }
196
197        CompiledGrammar {
198            rules: self.rules,
199            ast_config: self.ast_config,
200        }
201    }
202
203    /// Build with automatic backtracking optimization
204    ///
205    /// This is equivalent to calling `.build().optimize_backtracking()`.
206    /// It detects Choice nodes with shared prefixes containing recursive rules
207    /// and rewrites them to factor out the common prefix.
208    pub fn build_optimized(self) -> CompiledGrammar {
209        self.build().optimize_backtracking()
210    }
211
212    /// Build with automatic memoization to avoid exponential backtracking.
213    ///
214    /// This analyzes the grammar to identify rules that would benefit from
215    /// memoization and automatically wraps them. Use this when you have
216    /// patterns that cause exponential backtracking (like TypeScript's
217    /// `identifier<types>(args)` vs comparison operators).
218    ///
219    /// The process:
220    /// 1. Analyze the grammar to find Choice nodes with shared recursive prefixes
221    /// 2. Identify rule references in those prefixes
222    /// 3. Wrap those rules with memoization
223    /// 4. Build the grammar
224    pub fn build_with_memoization(mut self) -> CompiledGrammar {
225        let candidates = prefix_factoring::identify_memoization_candidates(&self.rules);
226
227        if !candidates.is_empty() {
228            // Assign unique memo IDs to each candidate
229            let mut memo_id = 0;
230
231            // Wrap candidate rules with memoization
232            for rule in &mut self.rules {
233                if candidates.contains(&rule.name) {
234                    rule.combinator = Combinator::Memoize {
235                        id: memo_id,
236                        inner: Box::new(rule.combinator.clone()),
237                    };
238                    memo_id += 1;
239                }
240            }
241        }
242
243        self.build()
244    }
245
246    /// Build with both prefix factoring optimization and automatic memoization.
247    ///
248    /// This combines the benefits of both optimization strategies:
249    /// 1. Prefix factoring rewrites Choice nodes to factor out common prefixes
250    /// 2. Memoization caches results for rules that cause exponential backtracking
251    ///
252    /// Use this for the most comprehensive backtracking prevention.
253    pub fn build_optimized_with_memoization(mut self) -> CompiledGrammar {
254        let candidates = prefix_factoring::identify_memoization_candidates(&self.rules);
255
256        if !candidates.is_empty() {
257            let mut memo_id = 0;
258            for rule in &mut self.rules {
259                if candidates.contains(&rule.name) {
260                    rule.combinator = Combinator::Memoize {
261                        id: memo_id,
262                        inner: Box::new(rule.combinator.clone()),
263                    };
264                    memo_id += 1;
265                }
266            }
267        }
268
269        self.build().optimize_backtracking()
270    }
271}
272
273/// Compiled grammar ready for code generation
274#[derive(Debug)]
275pub struct CompiledGrammar {
276    pub rules: Vec<RuleDef>,
277    pub ast_config: AstConfig,
278}
279
280impl CompiledGrammar {
281    /// Generate Rust source code for the parser
282    pub fn generate(&self) -> String {
283        CodeGenerator::new(self).generate()
284    }
285
286    /// Analyze the grammar for backtracking issues.
287    ///
288    /// Returns warnings for each Choice node that has alternatives sharing
289    /// a common prefix containing recursive rules (causing O(2^n) parsing).
290    pub fn analyze_backtracking(&self) -> Vec<BacktrackingWarning> {
291        prefix_factoring::analyze_grammar(&self.rules)
292    }
293
294    /// Optimize the grammar by factoring out common prefixes in Choice nodes.
295    ///
296    /// This transforms patterns like:
297    /// ```text
298    /// Choice([Seq([A, B, C]), Seq([A, B, D])])
299    /// ```
300    /// Into:
301    /// ```text
302    /// Seq([A, B, Choice([C, D])])
303    /// ```
304    ///
305    /// Only transforms choices with `Exponential` severity (shared prefix
306    /// containing recursive rules).
307    pub fn optimize_backtracking(self) -> Self {
308        use std::collections::HashMap;
309
310        // Build rule map from references before consuming self
311        let rule_map: HashMap<&str, &Combinator> = self
312            .rules
313            .iter()
314            .map(|r| (r.name.as_str(), &r.combinator))
315            .collect();
316
317        // Optimize each rule's combinator
318        let optimized_rules: Vec<RuleDef> = self
319            .rules
320            .iter()
321            .map(|rule| RuleDef {
322                name: rule.name.clone(),
323                combinator: prefix_factoring::optimize_combinator(&rule.combinator, &rule_map),
324            })
325            .collect();
326
327        CompiledGrammar {
328            rules: optimized_rules,
329            ast_config: self.ast_config,
330        }
331    }
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337
338    #[test]
339    fn test_basic_grammar() {
340        let grammar = Grammar::new()
341            .rule("number", |r| r.capture(r.one_or_more(r.digit())))
342            .rule("expr", |r| {
343                r.sequence((r.parse("number"), r.lit("+"), r.parse("number")))
344            })
345            .build();
346
347        assert_eq!(grammar.rules.len(), 2);
348        assert_eq!(grammar.rules[0].name, "number");
349        assert_eq!(grammar.rules[1].name, "expr");
350    }
351}