syntax_parser_generator/parsing/translator/build.rs
1use std::collections::HashMap;
2
3use crate::handles::{Handle, Handled};
4use crate::handles::collections::{HandledVec, HandleMap};
5use crate::handles::specials::AutomaticallyHandled;
6use crate::parsing::lr_parser::build::LrParserBuilder;
7use crate::parsing::lr_parser::rules::{Associativity, Binding, GrammarSymbol};
8use crate::parsing::translator::handlers::{LeafSatelliteBuilder, SatelliteReducer};
9use crate::parsing::translator::sdt::SyntaxDirectedTranslator;
10
11/// An interface for specifying and compiling a [SyntaxDirectedTranslator].
12///
13/// An instance of this type serve as a "contractor" for building a LALR parser. It exports an API
14/// to communicate the specifications of the parser, and when you're done - its
15/// [build](SyntaxDirectedTranslatorBuilder::build) can be used to compile a matching instance of
16/// [SyntaxDirectedTranslator].
17///
18/// # Features
19///
20/// ## Bindings
21///
22/// A group of terminal symbols (i.e. lexeme types) can be associated with a _binding_, which can
23/// then be attached to the grammar's production rules. Bindings are used to resolve shift-reduce
24/// conflicts: when deciding between shifting by a bound terminal or reducing by a bound rule,
25/// the one with higher priority binding is selected. If both have the same binding - we reduce if
26/// it is left-associated, and shift if it is right-associated.
27///
28/// This can be used to implement associativity and precedence of operators. For example - one
29/// left-associated higher-priority binding for multiplication and division, and another
30/// left-associated lower-priority binding for addition and subtraction.
31///
32/// ## Dubs
33///
34/// To make the parser's programmatic specification cleaner, the notion of _dubs_ was introduced:
35/// instead of referring to a terminal, nonterminal, or a binding, by some complex Rust object,
36/// these are referred to by their "dubs" - character strings that identify them.
37///
38/// ## Translation Scheme Specifications
39///
40/// The compiled parser will be capable of translating a given sequence of lexemes into some
41/// higher-level representation (AST, IR sequence, etc.). We refer to this higher level
42/// representation as _satellite data_ (as it escorts subtrees of the input's syntax tree). Client
43/// code is free to determine this target representation and the translation logic.
44///
45/// This translation logic is carried out recursively: a subtree of the input syntax tree is
46/// translated into satellite data as a function of the translations of its own subtrees. We call
47/// such translation functions _satellite reducers_, as they execute after each reduction of
48/// grammar symbols by the underlying LR parser. At the leaves, lexemes are directly translated to
49/// satellite data using dedicated functions we call _leaf satellite builders_.
50///
51/// All of these translation functions receive a mutable reference to the _translation context_ as
52/// an additional argument, which is a user-defined object that's used to hold global knowledge
53/// about the translated input, such as symbol tables and general statistics.
54///
55/// All of this translation logic is of course user-defined, providing client code with great
56/// flexibility when building custom syntax-directed translators.
57///
58/// # Example
59///
60/// Check out the example at [parsing](crate::parsing).
61///
62/// # Conflict Resolution Policy
63///
64/// * Reduce-reduce conflicts cannot be resolved.
65/// * Shift-reduce conflicts are resolved with "shift" by default, unless otherwise specified by
66/// bindings.
67pub struct SyntaxDirectedTranslatorBuilder<LexemeType, Context, Satellite>
68where
69 LexemeType: AutomaticallyHandled,
70{
71 grammar_symbol_dub_map: HashMap<String, GrammarSymbol<LexemeType, Nonterminal>>,
72 nonterminals: HandledVec<Nonterminal>,
73 lr_parser_builder:
74 LrParserBuilder<LexemeType, Nonterminal, SatelliteReducer<Context, Satellite>>,
75 bindings_dub_map: HashMap<String, Handle<Binding<LexemeType>>>,
76 leaf_satellite_builder_map: HandleMap<LexemeType, LeafSatelliteBuilder<Context, Satellite>>,
77 default_leaf_satellite_builder: Option<LeafSatelliteBuilder<Context, Satellite>>,
78 satellite_reducers: HandledVec<SatelliteReducer<Context, Satellite>>,
79}
80
81impl<LexemeType, Context, Satellite> SyntaxDirectedTranslatorBuilder<LexemeType, Context, Satellite>
82where
83 LexemeType: AutomaticallyHandled,
84{
85 /// Create a blank [SyntaxDirectedTranslatorBuilder], with no registered specifications.
86 pub fn new() -> Self {
87 Self {
88 grammar_symbol_dub_map: HashMap::new(),
89 nonterminals: HandledVec::new(),
90 lr_parser_builder: LrParserBuilder::new(),
91 bindings_dub_map: HashMap::new(),
92 leaf_satellite_builder_map: HandleMap::new(),
93 default_leaf_satellite_builder: None,
94 satellite_reducers: HandledVec::new(),
95 }
96 }
97
98 /// Dub with a category of lexemes.
99 ///
100 /// Remember: lexeme types serve as terminal symbols for the underlying LALR parser. This
101 /// function, in effect, dubs a terminal symbol in the grammar.
102 ///
103 /// # Panics
104 ///
105 /// If some other grammar symbol is already identified by this dub.
106 pub fn dub_lexeme_type(&mut self, lexeme_type: LexemeType, dub: &str) {
107 if self.grammar_symbol_dub_map.contains_key(dub) {
108 panic!(
109 "Tried to dub a lexeme type {:?}, which is already used to dub another \
110 grammar symbol",
111 dub,
112 )
113 }
114 self.grammar_symbol_dub_map.insert(
115 String::from(dub),
116 GrammarSymbol::Terminal(lexeme_type.handle()),
117 );
118 }
119
120 /// Dub multiple categories of lexemes at once.
121 ///
122 /// # Panics
123 ///
124 /// If a new dub is already used to dub another grammar symbol.
125 pub fn dub_lexeme_types<'a>(
126 &mut self,
127 lexeme_type_dubs: impl Iterator<Item=(LexemeType, &'a str)>,
128 ) {
129 for (lexeme_type, dub) in lexeme_type_dubs {
130 self.dub_lexeme_type(lexeme_type, dub);
131 }
132 }
133
134 /// Create a new nonterminal grammar symbol, associated with the given dub.
135 ///
136 /// # Panics
137 ///
138 /// If some other grammar symbol is already identified by this dub.
139 pub fn new_nonterminal(&mut self, dub: &str) {
140 if self.grammar_symbol_dub_map.contains_key(dub) {
141 panic!(
142 "Tried to create a new nonterminal dubbed {:?}, which is already used to dub \
143 another grammar symbol",
144 dub,
145 )
146 }
147 let nonterminal = self.nonterminals.insert(Nonterminal);
148 self.grammar_symbol_dub_map
149 .insert(String::from(dub), GrammarSymbol::Nonterminal(nonterminal));
150 }
151
152 /// Create multiple nonterminals at once, associated with the given dubs.
153 ///
154 /// # Panics
155 ///
156 /// If a new dub is already used to dub another grammar symbol.
157 pub fn new_nonterminals<'a>(&mut self, dubs: impl Iterator<Item=&'a str>) {
158 for dub in dubs {
159 self.new_nonterminal(dub);
160 }
161 }
162
163 /// Set the start nonterminal of the underlying grammar.
164 ///
165 /// The nonterminal is identified by its `dub`.
166 ///
167 /// # Panics
168 ///
169 /// If no nonterminal is associated with this dub.
170 pub fn set_start_nonterminal(&mut self, dub: &str) {
171 let nonterminal = match self.grammar_symbol_dub_map.get(dub) {
172 Some(GrammarSymbol::Nonterminal(nonterminal)) => *nonterminal,
173 _ => panic!(
174 "Tried to set start nonterminal to a non-existing nonterminal dubbed {:?}",
175 dub,
176 ),
177 };
178 self.lr_parser_builder.set_start_nonterminal(nonterminal);
179 }
180
181 /// Register a new binding.
182 ///
183 /// Note that the order of registration determines the priority of the bindings: bindings
184 /// registered earlier have higher priority over bindings registered later.
185 ///
186 /// # Arguments
187 /// * `bound_lexeme_types_dubs` - the dubs of the terminal grammar symbols (lexeme categories)
188 /// that are bound to this binding.
189 /// * `associativity` - the binding's associativity.
190 /// * `binding_dub` - a dub, used to identify this binding.
191 ///
192 /// # Panics
193 ///
194 /// If the binding's dub is already used to dub another binding, or if no terminal symbol is
195 /// dubbed with one of the `bound_lexeme_types_dubs`.
196 pub fn new_binding(
197 &mut self,
198 bound_lexeme_types_dubs: Vec<&str>,
199 associativity: Associativity,
200 binding_dub: &str,
201 ) {
202 if self.bindings_dub_map.contains_key(binding_dub) {
203 panic!(
204 "Tried to create a new binding dubbed {:?}, which is already used to dub an \
205 existing binding",
206 binding_dub,
207 )
208 }
209 let terminals = bound_lexeme_types_dubs
210 .iter()
211 .map(
212 |&lexeme_type_dub| match self.grammar_symbol_dub_map.get(lexeme_type_dub) {
213 Some(GrammarSymbol::Terminal(terminal)) => *terminal,
214 _ => panic!(
215 "Tried to create a binding dubbed {:?} , bound to a non-existing \
216 lexeme type dubbed {:?}",
217 binding_dub, lexeme_type_dub,
218 ),
219 },
220 )
221 .collect();
222 self.bindings_dub_map.insert(
223 String::from(binding_dub),
224 self.lr_parser_builder
225 .register_binding(terminals, associativity),
226 );
227 }
228
229 /// Set the function that'll transform lexemes of a certain type into satellite data.
230 ///
231 /// When a lexeme of the category dubbed `lexeme_type_dub` will be encountered, `builder` will
232 /// be called with the lexeme's contents (alongside a reference to the translation context), and
233 /// the output will serve as the satellite data attached to this "terminal symbol".
234 ///
235 /// # Panics
236 ///
237 /// If `lexeme_type_dub` isn't a known lexeme-type dub.
238 pub fn set_leaf_satellite_builder<F>(&mut self, lexeme_type_dub: &str, builder: F)
239 where
240 F: Fn(&mut Context, String) -> Satellite + 'static,
241 {
242 let lexeme_type = match self.grammar_symbol_dub_map.get(lexeme_type_dub) {
243 Some(GrammarSymbol::Terminal(lexeme_type)) => *lexeme_type,
244 _ => panic!(
245 "Tried to set a leaf satellite builder for a non-existing lexeme type dubbed \
246 {:?}",
247 lexeme_type_dub,
248 ),
249 };
250 self.leaf_satellite_builder_map
251 .insert(lexeme_type, Box::new(builder));
252 }
253
254 /// Set a leaf-satellite builder to process lexeme types for which no builder was set with
255 /// [SyntaxDirectedTranslatorBuilder::set_leaf_satellite_builder].
256 pub fn set_default_leaf_satellite_builder<F>(&mut self, builder: F)
257 where
258 F: Fn(&mut Context, String) -> Satellite + 'static,
259 {
260 self.default_leaf_satellite_builder = Some(Box::new(builder));
261 }
262
263 /// Add a production rule to the underlying context-free grammar.
264 ///
265 /// # Arguments
266 ///
267 /// * `lhs` - the dub of the nonterminal in the left-hand side of the production.
268 /// * `rhs` - a sequence of grammar-symbols dubs (terminals and nonterminals), that appear in
269 /// the right-hand side of the production.
270 /// * `satellite_reducer` - when reducing by the new rule, this function will be called with the
271 /// satellite data of the right-hand side grammar symbols (and the translation context), and
272 /// its output will serve as the satellite data attached to the left-hand side nonterminal.
273 ///
274 /// # Panics
275 ///
276 /// If any the given dubs isn't known as a matching dub.
277 pub fn register_rule<F>(&mut self, lhs: &str, rhs: Vec<&str>, satellite_reducer: F)
278 where
279 F: Fn(&mut Context, Vec<Satellite>) -> Satellite + 'static,
280 {
281 self.register_rule_raw(lhs, rhs, None, satellite_reducer);
282 }
283
284 /// Register a production rule with an associated binding.
285 ///
286 /// The associated binding is identified by its dub - `binding_dub`. The rest of the arguments
287 /// are identical to [SyntaxDirectedTranslatorBuilder::register_rule].
288 ///
289 /// # Panics
290 ///
291 /// If any the given dubs isn't known as a matching dub.
292 pub fn register_bound_rule<F>(
293 &mut self,
294 lhs: &str,
295 rhs: Vec<&str>,
296 binding_dub: &str,
297 satellite_reducer: F,
298 ) where
299 F: Fn(&mut Context, Vec<Satellite>) -> Satellite + 'static,
300 {
301 self.register_rule_raw(lhs, rhs, Some(binding_dub), satellite_reducer);
302 }
303
304 /// Register a production rule for which the satellite reducer copies the satellite data from
305 /// its 1-length RHS to its LHS.
306 ///
307 /// This is commonly used for productions, such as `expression -> integer_literal`.
308 ///
309 /// # Arguments
310 ///
311 /// * `lhs` - the dub of the nonterminal in the left-hand side of the production.
312 /// * `rhs` - the dub of the only grammar-symbol in the right-hand side of the production.
313 ///
314 /// # Panics
315 ///
316 /// If any the given dubs isn't known as a matching dub.
317 pub fn register_identity_rule(&mut self, lhs: &str, rhs: &str) {
318 self.register_rule(lhs, vec![rhs], |_context, mut satellites| {
319 satellites.pop().expect(
320 "Tried to reduce satellites using `identity_satellite_reducer`, but the \
321 provided RHS satellite list is empty (it should contain a single satellite for the \
322 single item in the RHS)",
323 )
324 });
325 }
326
327 /// Register a production rule whose RHS is empty.
328 ///
329 /// This is commonly used with nonterminals that represent lists that can be empty. For
330 /// example `statements_list -> _nothing_`.
331 ///
332 /// # Arguments
333 ///
334 /// * `lhs` - the dub of the nonterminal in the left-hand side of the production.
335 /// * `default_satellite_builder` - when reducing by this rule, this will be called (with the
336 /// translation context), and its output will serve as the satellite data attached to the
337 /// left-hand side nonterminal.
338 ///
339 /// # Panics
340 ///
341 /// If `lhs` is not a known dub for a nonterminal.
342 pub fn register_empty_rule<F>(&mut self, lhs: &str, default_satellite_builder: F)
343 where
344 F: Fn(&mut Context) -> Satellite + 'static,
345 {
346 self.register_rule(lhs, vec![], move |context, _satellites| {
347 default_satellite_builder(context)
348 })
349 }
350
351 fn register_rule_raw<F>(
352 &mut self,
353 lhs_dub: &str,
354 rhs_dubs: Vec<&str>,
355 binding_dub: Option<&str>,
356 satellite_reducer: F,
357 ) where
358 F: Fn(&mut Context, Vec<Satellite>) -> Satellite + 'static,
359 {
360 let lhs = match self.grammar_symbol_dub_map.get(lhs_dub) {
361 Some(GrammarSymbol::Nonterminal(nonterminal)) => *nonterminal,
362 _ => panic!(
363 "Tried to register a production rule whose LHS is a non-existing nonterminal \
364 dubbed {:?}",
365 lhs_dub,
366 ),
367 };
368 let rhs = rhs_dubs
369 .iter()
370 .map(|&dub| match self.grammar_symbol_dub_map.get(dub) {
371 Some(&grammar_symbol) => grammar_symbol,
372 _ => panic!(
373 "Tried to register a production rule whose RHS contains non-existing grammar \
374 symbol dubbed {:?}",
375 dub,
376 ),
377 })
378 .collect();
379 let binding = binding_dub.map(|actual_binding_dub| {
380 match self.bindings_dub_map.get(actual_binding_dub) {
381 Some(&binding) => binding,
382 None => panic!(
383 "Tried to register a production rule bound to a non-existing binding dubbed \
384 {:?}",
385 actual_binding_dub,
386 ),
387 }
388 });
389 let tag = self.satellite_reducers.insert(Box::new(satellite_reducer));
390 self.lr_parser_builder.register_rule(lhs, rhs, binding, tag);
391 }
392
393 /// Compile the set specifications into a functioning [SyntaxDirectedTranslator].
394 pub fn build(self) -> SyntaxDirectedTranslator<LexemeType, Context, Satellite> {
395 SyntaxDirectedTranslator {
396 lr_parser: self.lr_parser_builder.build(),
397 default_leaf_satellite_builder: self.default_leaf_satellite_builder,
398 leaf_satellite_builder_map: self.leaf_satellite_builder_map,
399 satellite_reducers: self.satellite_reducers,
400 }
401 }
402}
403
404// Blank, don't really need to carry any info, Handle API is only used for counting registrations
405pub struct Nonterminal;
406impl Handled for Nonterminal {
407 type HandleCoreType = u8;
408}