perplex/
parser.rs

1// Copyright (c) 2018 Fabian Schuiki
2
3//! A parser for grammar descriptions.
4
5#![allow(unused_variables)]
6
7use std::str;
8use std::collections::HashMap;
9
10use lexer::{Keyword, Lexer, Token};
11use grammar::{Grammar, NonterminalId, Rule, Symbol, TerminalId, END};
12use backend::Backend;
13use perplex_runtime::Parser;
14
15type Terminal = Option<Token>;
16
17include!("parser_states.rs");
18
19/// The abstract syntax tree of a grammar description.
20pub mod ast {
21    /// The root node of a grammar description.
22    #[derive(Debug, PartialEq, Eq, Hash)]
23    pub struct Desc {
24        /// The token declarations.
25        pub tokens: Vec<TokenDecl>,
26        /// The rule declarations.
27        pub rules: Vec<RuleDecl>,
28    }
29
30    /// An item in the grammar description.
31    #[allow(missing_docs)]
32    #[derive(Debug, PartialEq, Eq, Hash)]
33    pub enum Item {
34        TokenDecl(TokenDecl),
35        RuleDecl(RuleDecl),
36    }
37
38    /// A token declaration.
39    #[derive(Debug, PartialEq, Eq, Hash)]
40    pub struct TokenDecl {
41        /// The name of the token.
42        pub name: TokenName,
43        /// The match pattern for this token.
44        pub pattern: Option<String>,
45    }
46
47    /// A token name.
48    #[allow(missing_docs)]
49    #[derive(Debug, PartialEq, Eq, Hash)]
50    pub enum TokenName {
51        End,
52        Name(String),
53    }
54
55    /// A rule declaration.
56    #[derive(Debug, PartialEq, Eq, Hash)]
57    pub struct RuleDecl {
58        /// The name of the rule.
59        pub name: String,
60        /// The type of the nonterminal a reduction of this rule produces.
61        pub reduce_type: Option<String>,
62        /// The different variants of the rule.
63        pub variants: Vec<Variant>,
64    }
65
66    /// A rule variant.
67    #[derive(Debug, PartialEq, Eq, Hash)]
68    pub struct Variant {
69        /// The sequence of symbols of the rule.
70        pub seq: Vec<String>,
71        /// The reduction function name of the rule.
72        pub reduction_function: Option<String>,
73    }
74}
75
76fn reduce_desc_a(mut desc: ast::Desc, item: ast::Item) -> ast::Desc {
77    match item {
78        ast::Item::TokenDecl(d) => desc.tokens.push(d),
79        ast::Item::RuleDecl(d) => desc.rules.push(d),
80    }
81    desc
82}
83
84fn reduce_desc_b(item: ast::Item) -> ast::Desc {
85    match item {
86        ast::Item::TokenDecl(d) => ast::Desc {
87            tokens: vec![d],
88            rules: vec![],
89        },
90        ast::Item::RuleDecl(d) => ast::Desc {
91            tokens: vec![],
92            rules: vec![d],
93        },
94    }
95}
96
97fn reduce_desc_c(desc: ast::Desc, _semicolon: Option<Token>) -> ast::Desc {
98    desc
99}
100
101fn reduce_desc_d(_semicolon: Option<Token>) -> ast::Desc {
102    ast::Desc {
103        tokens: vec![],
104        rules: vec![],
105    }
106}
107
108fn reduce_item_a(token_decl: ast::TokenDecl) -> ast::Item {
109    ast::Item::TokenDecl(token_decl)
110}
111
112fn reduce_item_b(rule_decl: ast::RuleDecl) -> ast::Item {
113    ast::Item::RuleDecl(rule_decl)
114}
115
116fn reduce_token_decl_a(
117    _keyword: Option<Token>,
118    name: ast::TokenName,
119    _lparen: Option<Token>,
120    pattern: Option<Token>,
121    _rparen: Option<Token>,
122    _semicolon: Option<Token>,
123) -> ast::TokenDecl {
124    ast::TokenDecl {
125        name: name,
126        pattern: Some(pattern.unwrap().unwrap_code()),
127    }
128}
129
130fn reduce_token_decl_b(
131    _keyword: Option<Token>,
132    name: ast::TokenName,
133    _semicolon: Option<Token>,
134) -> ast::TokenDecl {
135    ast::TokenDecl {
136        name: name,
137        pattern: None,
138    }
139}
140
141fn reduce_token_name_a(name: Option<Token>) -> ast::TokenName {
142    ast::TokenName::Name(name.unwrap().unwrap_ident())
143}
144
145fn reduce_token_name_b(_end: Option<Token>) -> ast::TokenName {
146    ast::TokenName::End
147}
148
149fn reduce_rule_decl_a(
150    name: Option<Token>,
151    _lparen: Option<Token>,
152    reduce_type: Option<Token>,
153    _rparen: Option<Token>,
154    _lbrace: Option<Token>,
155    list: Vec<ast::Variant>,
156    rbrace: Option<Token>,
157) -> ast::RuleDecl {
158    ast::RuleDecl {
159        name: name.unwrap().unwrap_ident(),
160        reduce_type: Some(reduce_type.unwrap().unwrap_code()),
161        variants: list,
162    }
163}
164
165fn reduce_rule_decl_b(
166    name: Option<Token>,
167    _lbrace: Option<Token>,
168    list: Vec<ast::Variant>,
169    rbrace: Option<Token>,
170) -> ast::RuleDecl {
171    ast::RuleDecl {
172        name: name.unwrap().unwrap_ident(),
173        reduce_type: None,
174        variants: list,
175    }
176}
177
178fn reduce_rule_list_a(mut list: Vec<ast::Variant>, variant: ast::Variant) -> Vec<ast::Variant> {
179    list.push(variant);
180    list
181}
182
183fn reduce_rule_list_b(variant: ast::Variant) -> Vec<ast::Variant> {
184    vec![variant]
185}
186
187fn reduce_variant_a(
188    seq: Vec<String>,
189    _lparen: Option<Token>,
190    reduction_function: Option<Token>,
191    _rparen: Option<Token>,
192    _semicolon: Option<Token>,
193) -> ast::Variant {
194    ast::Variant {
195        seq: seq,
196        reduction_function: Some(reduction_function.unwrap().unwrap_code()),
197    }
198}
199
200fn reduce_variant_b(seq: Vec<String>, _semicolon: Option<Token>) -> ast::Variant {
201    ast::Variant {
202        seq: seq,
203        reduction_function: None,
204    }
205}
206
207fn reduce_sequence_or_epsilon_a(seq: Vec<String>) -> Vec<String> {
208    seq
209}
210
211fn reduce_sequence_or_epsilon_b(_epsilon: Option<Token>) -> Vec<String> {
212    vec![]
213}
214
215fn reduce_sequence_a(mut seq: Vec<String>, symbol: Option<Token>) -> Vec<String> {
216    seq.push(symbol.unwrap().unwrap_ident());
217    seq
218}
219
220fn reduce_sequence_b(symbol: Option<Token>) -> Vec<String> {
221    vec![symbol.unwrap().unwrap_ident()]
222}
223
224struct StateSpace;
225
226impl ::perplex_runtime::StateSpace for StateSpace {
227    type Terminal = Option<Token>;
228    type Nonterminal = Nonterminal;
229    type Root = ();
230
231    fn root_state_fn<P: Parser<Terminal = Option<Token>, Nonterminal = Nonterminal>>() -> fn(&mut P)
232    {
233        state_0
234    }
235
236    fn root_goto_fn<P: Parser<Terminal = Option<Token>, Nonterminal = Nonterminal>>(
237) -> fn(&mut P, Self::Nonterminal) {
238        reduced_0
239    }
240}
241
242type CoreParser<I> = ::perplex_runtime::ParserMachine<I, StateSpace>;
243
244/// Parse a sequence of tokens given by an iterator.
245pub fn parse_iter<I: Iterator<Item = Token>>(input: I) -> ast::Desc {
246    CoreParser::from_iter(input).run().unwrap_nt0()
247}
248
249/// Parse a string.
250///
251/// This first tokenizes the string and then parses it.
252pub fn parse_str<S: AsRef<str>>(input: S) -> ast::Desc {
253    let lex = Lexer::new(input.as_ref().char_indices());
254    parse_iter(lex.map(|(_, _, t)| t))
255}
256
257/// Convert the grammar description into an actual grammar.
258pub fn make_grammar(desc: &ast::Desc) -> (Grammar, Backend) {
259    let mut grammar = Grammar::new();
260    let mut backend = Backend::new();
261
262    // Declare the terminals and nonterminals.
263    let mut token_map: HashMap<String, TerminalId> = HashMap::new();
264    let mut rule_map: HashMap<String, NonterminalId> = HashMap::new();
265    for d in &desc.tokens {
266        let id = match d.name {
267            ast::TokenName::Name(ref name) => {
268                let id = grammar.add_terminal(name.clone());
269                token_map.insert(name.clone(), id);
270                id
271            }
272            ast::TokenName::End => END,
273        };
274        if let Some(pat) = d.pattern.clone() {
275            backend.add_terminal(id, pat);
276        }
277    }
278    for d in &desc.rules {
279        let id = grammar.add_nonterminal(d.name.clone());
280        rule_map.insert(d.name.clone(), id);
281        if let Some(reduce_type) = d.reduce_type.clone() {
282            backend.add_nonterminal(id, reduce_type);
283        }
284    }
285
286    // Create a unified symbol lookup table.
287    let mut symbol_map: HashMap<String, Symbol> = HashMap::new();
288    for (n, &i) in &token_map {
289        symbol_map.insert(n.clone(), i.into());
290    }
291    for (n, &i) in &rule_map {
292        if symbol_map.insert(n.clone(), i.into()).is_some() {
293            panic!("rule name `{}` conflicts with token name `{}`", n, n);
294        }
295    }
296
297    // Add the rules to the grammar.
298    for d in &desc.rules {
299        let id = rule_map[&d.name];
300        for v in &d.variants {
301            let seq = v.seq
302                .iter()
303                .map(|v| match symbol_map.get(v) {
304                    Some(&s) => s,
305                    None => panic!("unknown token or rule `{}`", v),
306                })
307                .collect();
308            let rule_id = grammar.add_rule(Rule::new(id, seq));
309            if let Some(rf) = v.reduction_function.clone() {
310                backend.add_reduction_function(rule_id, rf);
311            }
312        }
313    }
314
315    (grammar, backend)
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321
322    #[test]
323    fn simple1() {
324        let seq = [
325            Token::Keyword(Keyword::Token),
326            Token::Ident("hello".into()),
327            Token::Semicolon,
328        ];
329        let res = parse_iter(seq.into_iter().cloned());
330        assert_eq!(
331            res,
332            ast::Desc {
333                tokens: vec![
334                    ast::TokenDecl {
335                        name: ast::TokenName::Name("hello".into()),
336                        pattern: None,
337                    },
338                ],
339                rules: vec![],
340            }
341        );
342    }
343}