syntax_parser_generator/parsing/
mod.rs

1//! Build and manage syntax-directed translators based on LALR parsers.
2//!
3//! The second step of the syntax-parsing pipeline is called _syntax directed translation (SDT)_,
4//! or _parsing_. This phase is responsible for reconstructing the hierarchical structure of the
5//! lexemes collected in the lexical analysis phase - also known as the input's _syntax tree_,
6//! according to a set of rules specified by a _context free grammar (CFG)_.
7//!
8//! # LALR Parsing
9//!
10//! This module provides a convenient interface for building and executing such parsers on streams
11//! on lexemes, based on the bottom-up [LALR](https://en.wikipedia.org/wiki/LALR_parser) parsing
12//! algorithm. Lexeme types  (e.g. keyword, identifier, integer literal) serve as the parser's
13//! "terminal symbols". The contents of the lexemes is only used for translation (see the next
14//! section), and is ignored by the LALR algorithm.
15//!
16//! # Translation
17//!
18//! As it is being reconstructed, the syntax tree is translated bottom-up into a more meaningful
19//! representation of the input, which we call _satellite data_ (as it escorts subtrees of the
20//! syntax tree). Client code is free to define the translation scheme and the target representation
21//! (the type of satellite data), which is usually an abstract syntax tree (AST), or an intermediate
22//! code representation (IR).
23//!
24//! To aid the translation process, a mutable _translation context_ can be managed by the parser.
25//! This is a user-defined object associated with each execution of the parser, which is
26//! consistently passed to client code responsible for translation of the input syntax tree. This
27//! can be used to manage global knowledge about the parsed input, such as symbol tables or general
28//! statistics.
29//!
30//! # See Also
31//!
32//! [SyntaxDirectedTranslatorBuilder], for more details on the specifications of such translation
33//! engines.
34//!
35//! # Example
36//!
37//! ```rust
38//! # use syntax_parser_generator::handles::specials::AutomaticallyHandled;
39//! # use syntax_parser_generator::lex::Lexeme;
40//! # use syntax_parser_generator::parsing::{Associativity, SyntaxDirectedTranslator, SyntaxDirectedTranslatorBuilder};
41//! # #[derive(Debug, Clone, Copy)]
42//! enum LexemeType { Integer, Plus, Star }
43//! # impl AutomaticallyHandled for LexemeType {
44//! #    type HandleCoreType = u8;
45//! #    fn serial(&self) -> usize { *self as usize }
46//! # }
47//! #
48//!
49//! struct Context {
50//!     integer_count: usize,
51//!     op_count: usize,
52//! }
53//!
54//! impl Context {
55//!     fn new() -> Self {
56//!         Self {
57//!             integer_count: 0,
58//!             op_count: 0,
59//!         }
60//!     }
61//!     fn integer(&mut self, lexeme: String) -> Option<i32> {
62//!         self.integer_count += 1;
63//!         Some(lexeme.parse().ok()?)
64//!     }
65//!     fn sum(&mut self, mut satellites: Vec<Option<i32>>) -> Option<i32> {
66//!         self.op_count += 1;
67//!         Some(satellites[0]? + satellites[2]?)
68//!     }
69//!
70//!     fn mult(&mut self, mut satellites: Vec<Option<i32>>) -> Option<i32> {
71//!         self.op_count += 1;
72//!         Some(satellites[0]? * satellites[2]?)
73//!     }
74//! }
75//!
76//! fn build_calculator() -> SyntaxDirectedTranslator<LexemeType, Context, Option<i32>> {
77//!     let mut builder = SyntaxDirectedTranslatorBuilder::new();
78//!
79//!     builder.dub_lexeme_types(vec![
80//!         (LexemeType::Integer, "INTEGER"),
81//!         (LexemeType::Plus, "+"),
82//!         (LexemeType::Star, "*"),
83//!     ].into_iter());
84//!     builder.new_nonterminal("expression");
85//!     builder.set_start_nonterminal("expression");
86//!
87//!     builder.set_leaf_satellite_builder("INTEGER", Context::integer);
88//!     builder.set_default_leaf_satellite_builder(|_, _| None);
89//!
90//!     builder.new_binding(
91//!         vec!["*"],
92//!         Associativity::Left,
93//!         "multiplicative"
94//!     );
95//!     builder.new_binding(
96//!         vec!["+"],
97//!         Associativity::Left,
98//!         "additive",
99//!     );
100//!
101//!     builder.register_identity_rule("expression", "INTEGER");
102//!     builder.register_bound_rule(
103//!         "expression",
104//!         vec!["expression", "+", "expression"],
105//!         "additive",
106//!         Context::sum,
107//!     );
108//!     builder.register_bound_rule(
109//!         "expression",
110//!         vec!["expression", "*", "expression"],
111//!         "multiplicative",
112//!         Context::mult,
113//!     );
114//!     builder.build()
115//! }
116//!
117//! fn main () {
118//!     let parser = build_calculator();
119//!     let mut context = Context::new();
120//!     let lexemes = vec![
121//!         Lexeme::new(LexemeType::Integer, "-2"),
122//!         Lexeme::new(LexemeType::Plus, "+"),
123//!         Lexeme::new(LexemeType::Integer, "4"),
124//!         Lexeme::new(LexemeType::Star, "*"),
125//!         Lexeme::new(LexemeType::Integer, "3"),
126//!     ];
127//!     assert_eq!(parser.translate(&mut context, lexemes.into_iter()), Some(Some(10)));
128//!     assert_eq!(context.integer_count, 3);
129//!     assert_eq!(context.op_count, 2);
130//! }
131//! ```
132
133pub use lr_parser::rules::Associativity;
134pub use translator::build::SyntaxDirectedTranslatorBuilder;
135pub use translator::sdt::SyntaxDirectedTranslator;
136
137mod lr_parser;
138mod translator;