bnf_syntax_parser/
bnf_rules.rs

1// use crate::syntex_analysis::Pa?rseTree;
2
3use std::{collections::HashMap, fmt::{Display, Write}};
4
5use super::{error::{ParseError, ParseErrorType, RuleParseError}, DescriptionIndex, MatchRecord, ParseTree, ParseTreeNode, Parser, RuleIndex, Token, TokenIndex, TokenType};
6
7#[derive(Clone, Debug)]
8pub enum BNFDescription<TT> {
9    Token(TT),
10    Tokens(Vec<TT>), // TODO Only for special case, it won't be automatically generated by bnf rule parser.
11    ExceptTokens(Vec<TT>), // Only for special case, it won't be automatically generated by bnf rule parser.
12    ChoiceStart(DescriptionIndex, DescriptionIndex), // NextChoiceMid, ChoiceEnd
13    ChoiceMidEnd(DescriptionIndex), // ChoiceEnd
14    ChoiceMidStart(DescriptionIndex, DescriptionIndex), // NextChoiceMid, ChoiceEnd
15    ChoiceEnd,
16    OptionStart(DescriptionIndex), // OptionEnd
17    OptionEnd,
18    RepetitionStart(DescriptionIndex), // RepetitionEnd
19    RepetitionEnd(DescriptionIndex), // RepetitionStart
20    Rule(RuleIndex),
21}
22
23impl<TT> BNFDescription<TT> {
24    fn try_into<TT2, E>(self) -> Result<BNFDescription<TT2>, E> where TT2: TryFrom<TT, Error = E> {
25        Ok(match self {
26            BNFDescription::Token(token_type) => BNFDescription::Token(token_type.try_into()?),
27            BNFDescription::Tokens(token_types) => {
28                let mut tt2_token_types = Vec::with_capacity(token_types.len());
29                for token_type in token_types {
30                    tt2_token_types.push(token_type.try_into()?);
31                }
32                BNFDescription::Tokens(tt2_token_types)
33            },
34            BNFDescription::ExceptTokens(token_types) => {
35                let mut tt2_token_types = Vec::with_capacity(token_types.len());
36                for token_type in token_types {
37                    tt2_token_types.push(token_type.try_into()?);
38                }
39                BNFDescription::ExceptTokens(tt2_token_types)
40            },
41            BNFDescription::ChoiceStart(NextChoiceMid, ChoiceEnd) => BNFDescription::ChoiceStart(NextChoiceMid, ChoiceEnd),
42            BNFDescription::ChoiceMidEnd(ChoiceEnd) => BNFDescription::ChoiceMidEnd(ChoiceEnd),
43            BNFDescription::ChoiceMidStart(NextChoiceMid, ChoiceEnd) => BNFDescription::ChoiceMidStart(NextChoiceMid, ChoiceEnd),
44            BNFDescription::ChoiceEnd => BNFDescription::ChoiceEnd,
45            BNFDescription::OptionStart(OptionEnd) => BNFDescription::OptionStart(OptionEnd),
46            BNFDescription::OptionEnd => BNFDescription::OptionEnd,
47            BNFDescription::RepetitionStart(RepetitionEnd) => BNFDescription::RepetitionStart(RepetitionEnd),
48            BNFDescription::RepetitionEnd(RepetitionStart) => BNFDescription::RepetitionEnd(RepetitionStart),
49            BNFDescription::Rule(rule_index) => BNFDescription::Rule(rule_index),
50        })
51    }
52    fn ignore_token_into<TT2>(&self) -> Option<BNFDescription<TT2>> {
53        Some(match self {
54            BNFDescription::Token(..) | BNFDescription::Tokens(..) | BNFDescription::ExceptTokens(..) => return None,
55            BNFDescription::ChoiceStart(NextChoiceMid, ChoiceEnd) => BNFDescription::ChoiceStart(*NextChoiceMid, *ChoiceEnd),
56            BNFDescription::ChoiceMidEnd(ChoiceEnd) => BNFDescription::ChoiceMidEnd(*ChoiceEnd),
57            BNFDescription::ChoiceMidStart(NextChoiceMid, ChoiceEnd) => BNFDescription::ChoiceMidStart(*NextChoiceMid, *ChoiceEnd),
58            BNFDescription::ChoiceEnd => BNFDescription::ChoiceEnd,
59            BNFDescription::OptionStart(OptionEnd) => BNFDescription::OptionStart(*OptionEnd),
60            BNFDescription::OptionEnd => BNFDescription::OptionEnd,
61            BNFDescription::RepetitionStart(RepetitionEnd) => BNFDescription::RepetitionStart(*RepetitionEnd),
62            BNFDescription::RepetitionEnd(RepetitionStart) => BNFDescription::RepetitionEnd(*RepetitionStart),
63            BNFDescription::Rule(rule_index) => BNFDescription::Rule(*rule_index),
64        })
65    }
66}
67
68#[derive(Clone, Debug)]
69pub struct BNFRule<TT> {
70    pub index: RuleIndex,
71    pub description: Vec<BNFDescription<TT>>,
72}
73
74impl<TT> BNFRule<TT> {
75    pub const fn new(index: RuleIndex, description: Vec<BNFDescription<TT>>) -> Self {
76        Self { index, description }
77    }
78    pub fn set_index(mut self) -> Result<Self, ParseError> {
79        let description_types = self.description.iter().map(
80            |d| d.ignore_token_into().unwrap_or(BNFDescription::Token(()))
81        ).collect::<Vec<BNFDescription<()>>>();
82        let mut stack = Vec::new();
83        let mut choice_stack = Vec::new();
84        for (i, d) in description_types.iter().enumerate() {
85            match d {
86                BNFDescription::ChoiceStart(..) | BNFDescription::ChoiceMidStart(..) => {
87                    stack.push(i);
88                    choice_stack.push(i);
89                },
90                BNFDescription::OptionStart(..) | BNFDescription::RepetitionStart(..) => {
91                    stack.push(i);
92                },
93                BNFDescription::ChoiceMidEnd(ChoiceEnd) => {
94                    choice_stack.push(i);
95                    match self.description.get_mut(stack.pop().unwrap()).unwrap() {
96                        BNFDescription::ChoiceStart(NextChoiceMid, ChoiceEnd) => *NextChoiceMid = i + 1,
97                        BNFDescription::ChoiceMidStart(NextChoiceMid, ChoiceEnd) => *NextChoiceMid = i + 1,
98                        _ => panic!(),
99                    }
100                    match self.description.get(i+1).unwrap() {
101                        BNFDescription::ChoiceMidStart(..) => (),
102                        _ => panic!(),
103                        // return Err(RuleParseError::UnexpectedRuleDescription(self.index, i+1, BNFDescription::ChoiceMidStart(0, 0)))
104                    };
105                },
106                BNFDescription::ChoiceEnd => {
107                    match self.description.get_mut(stack.pop().unwrap()).unwrap() {
108                        BNFDescription::ChoiceStart(NextChoiceMid, ChoiceEnd) => *NextChoiceMid = i,
109                        BNFDescription::ChoiceMidStart(NextChoiceMid, ChoiceEnd) => *NextChoiceMid = i,
110                        _ => panic!(),
111                    }
112                    loop {
113                        match self.description.get_mut(choice_stack.pop().unwrap()).unwrap() {
114                            BNFDescription::ChoiceStart(NextChoiceMid, ChoiceEnd) => {*ChoiceEnd = i; break},
115                            BNFDescription::ChoiceMidStart(NextChoiceMid, ChoiceEnd) => *ChoiceEnd = i,
116                            BNFDescription::ChoiceMidEnd(ChoiceEnd) => *ChoiceEnd = i,
117                            _ => panic!(),
118                        }
119                    }
120                },
121                BNFDescription::OptionEnd => match self.description.get_mut(stack.pop().unwrap()).unwrap() {
122                    BNFDescription::OptionStart(OptionEnd) => *OptionEnd = i,
123                    _ => panic!(),
124                },
125                BNFDescription::RepetitionEnd(..) => {
126                    let repetition_start = stack.pop().unwrap();
127                    match self.description.get_mut(i).unwrap() {
128                        BNFDescription::RepetitionEnd(RepetitionStart) => *RepetitionStart = repetition_start,
129                        _ => panic!()
130                    };
131                    match self.description.get_mut(repetition_start).unwrap() {
132                        BNFDescription::RepetitionStart(RepetitionEnd) => *RepetitionEnd = i,
133                        _ => panic!(),
134                    };
135                },
136                _ => (),
137            }
138        }
139        assert!(stack.is_empty() && choice_stack.is_empty());
140        Ok(self)
141    }
142}
143
144#[derive(Clone, Debug)]
145pub struct BNFRules<TT> {
146    pub rules: Vec<BNFRule<TT>>,
147    pub start_rule_index: Option<RuleIndex>,
148    pub rules_index: HashMap<String, RuleIndex>,
149    pub rules_identifier: Vec<String>,
150}
151
152impl<TT> Default for BNFRules<TT> {
153    fn default() -> Self {
154        Self { rules: Vec::new(), start_rule_index: None, rules_index: HashMap::new(), rules_identifier: Vec::new() }
155    }
156}
157
158impl<TT> BNFRules<TT> where TT: TokenType {
159    pub fn new<GetTokenType>(rules_vec_char: &Vec<char>, get_token_type: GetTokenType, start_rule_identifier: &String) -> Result<Self, RuleParseError>
160    where GetTokenType: Fn(&String) -> Option<TT> {
161        let mut bnf_rules = Self { rules: Vec::new(), start_rule_index: None, rules_index: HashMap::new(), rules_identifier: Vec::new() };
162        bnf_rules.compile_bnf_rules(rules_vec_char, get_token_type)?;
163        bnf_rules.start_rule_index = bnf_rules.rules_index.get(start_rule_identifier).cloned();
164        Ok(bnf_rules)
165    }
166    pub fn compile_bnf_rules<GetTokenType>(&mut self, rules_vec_char: &Vec<char>, get_token_type: GetTokenType) -> Result<(), RuleParseError> where GetTokenType: Fn(&String) -> Option<TT> {
167        self.rules_index.clear();
168        self.rules_identifier.clear();
169        let rules_parse_tree = Self::parse_rules_vec_char(rules_vec_char).map_err(|e| RuleParseError::ParseError(e))?;
170        // rules_parse_tree.display(|token_index| Some(rules_vec_char[token_index]), |rule_index| Some(rule_index)).unwrap();
171        // return Ok(());
172        let rules_char = Self::set_rule_index(rules_vec_char, rules_parse_tree, &mut self.rules_index, &mut self.rules_identifier);
173        self.rules.clear();
174        self.rules.reserve(rules_char.len());
175        for rule_char in rules_char {
176            self.rules.push(BNFRule {
177                index: rule_char.index,
178                description: {
179                    let mut description = Vec::with_capacity(rule_char.description.len());
180                    for d in rule_char.description {
181                        description.push(match d {
182                            BNFDescription::Tokens(identifier) => {
183                                let identifier = identifier.iter().collect::<String>();
184                                match (get_token_type(&identifier), self.rules_index.get(&identifier)) {
185                                    (Some(_), Some(_)) => return Err(RuleParseError::IdentifierMultipleDefinition(identifier)),
186                                    (Some(tt), None) => BNFDescription::Token(tt),
187                                    (None, Some(rule_index)) => BNFDescription::Rule(*rule_index),
188                                    (None, None) => return Err(RuleParseError::IdentifierDefinitionNotFound(identifier)),
189                                }
190                            }
191                            _ => d.ignore_token_into().unwrap(),
192                        });
193                    }
194                    description
195                }
196            }.set_index().map_err(|e| RuleParseError::ParseError(e))?);
197        }
198        Ok(())
199    }
200    fn parse_rules_vec_char(rules_vec_char: &Vec<char>) -> Result<ParseTree, ParseError> {
201        let bnf_compiler_rules = super::rules_parser_generator::RulesParserGenerator::default().generate_bnf_rules_parser_rules();
202        // super::debug_recorder::new_recorder();
203        let match_result =
204            Parser::try_from(&bnf_compiler_rules).unwrap()
205            // super::rules_parser_generator::RulesParserGenerator::default().generate_bnf_rules_parser()
206        .parse(rules_vec_char).unwrap()?;
207        /* super::debug_recorder::active_recorder().unwrap().display(&bnf_compiler_rules,
208            |token_index| rules_vec_char.get(token_index).map(|c| if c.is_whitespace() {'~'} else {*c}),
209            |token_type| Some(if token_type.is_whitespace() {'~'} else {*token_type})
210        ).unwrap(); */
211        Ok(ParseTree::new(&match_result).unwrap())
212    }
213    fn set_rule_index(rules_vec_char: &Vec<char>, rules_parse_tree: ParseTree, rules_index: &mut HashMap<String, RuleIndex>, rules_identifier: &mut Vec<String>) -> Vec<BNFRule<char>> {
214        rules_parse_tree.sub_nodes.into_iter().filter_map(
215            |sub_node| if let ParseTreeNode::ParseTree(rule_parse_tree) = sub_node {
216                if rule_parse_tree.rule_index == super::rules_parser_generator::BNF_RULE {
217                    Some(Self::parse_rule_parse_tree(&rules_vec_char, rule_parse_tree, rules_index, rules_identifier))
218                } else { None }
219            } else { None }
220        ).collect::<Vec<BNFRule<char>>>()
221    }
222    fn parse_rule_parse_tree(rules_vec_char: &Vec<char>, rule_parse_tree: ParseTree, rules_index: &mut HashMap<String, RuleIndex>, rules_identifier: &mut Vec<String>) -> BNFRule<char> {
223        macro_rules! IdentifierFromParseTree {
224            ($identifier_parse_tree: expr) => {
225                $identifier_parse_tree.sub_nodes.iter().filter_map(|sub_node| match sub_node {
226                    ParseTreeNode::Token(identifier_token_index) => {
227                        rules_vec_char.get(*identifier_token_index).copied()
228                    },
229                    _ => panic!(),
230                }).collect::<Vec<char>>()
231            };
232        }
233        let mut state = 0;
234        let mut bnf_rule = BNFRule { index: 0, description: Vec::new() };
235        assert!(rule_parse_tree.rule_index == super::rules_parser_generator::BNF_RULE);
236        for description in rule_parse_tree.sub_nodes {
237            match state {
238                0 => match description {
239                    ParseTreeNode::ParseTree(identifier_parse_tree) => {
240                        assert!(identifier_parse_tree.rule_index == super::rules_parser_generator::IDENTIFIER);
241                        let identifier = IdentifierFromParseTree!(identifier_parse_tree).iter().collect::<String>();
242                        match rules_index.get(&identifier) {
243                            Some(rule_index) => bnf_rule.index = *rule_index,
244                            None => {
245                                bnf_rule.index = rules_identifier.len();
246                                rules_index.insert(identifier.clone(), rules_identifier.len());
247                                rules_identifier.push(identifier);
248                            },
249                        }
250                        state += 1;
251                    },
252                    _ => panic!("rule_parse_tree.rule_index: {}", rule_parse_tree.rule_index), // 第一个一定是 ParseTree BNF_RULE
253                },
254                1 => match description {
255                    ParseTreeNode::Token(equal_sign) => if *rules_vec_char.get(equal_sign).unwrap() == '=' { state += 1; },
256                    _ => (), // 无视空白字符
257                },
258                2 => match description {
259                    ParseTreeNode::Token(token_index) => match *rules_vec_char.get(token_index).unwrap() {
260                        '(' => bnf_rule.description.push(BNFDescription::ChoiceStart(0, 0)),
261                        '|' => bnf_rule.description.extend_from_slice(&[BNFDescription::ChoiceMidEnd(0), BNFDescription::ChoiceMidStart(0, 0)]),
262                        ')' => bnf_rule.description.push(BNFDescription::ChoiceEnd),
263                        '[' => bnf_rule.description.push(BNFDescription::OptionStart(0)),
264                        ']' => bnf_rule.description.push(BNFDescription::OptionEnd),
265                        '{' => bnf_rule.description.push(BNFDescription::RepetitionStart(0)),
266                        '}' => bnf_rule.description.push(BNFDescription::RepetitionEnd(0)),
267                        // ';' => break;
268                        _ => (), // 无视空白字符
269                    },
270                    ParseTreeNode::ParseTree(identifier_parse_tree) => {
271                        match identifier_parse_tree.rule_index {
272                            super::rules_parser_generator::IDENTIFIER => {
273                                let identifier = IdentifierFromParseTree!(identifier_parse_tree);
274                                bnf_rule.description.push(BNFDescription::Tokens(identifier));
275                            },
276                            super::rules_parser_generator::STRING => {
277                                let mut identifier = IdentifierFromParseTree!(identifier_parse_tree);
278                                identifier.pop();
279                                identifier.remove(0);
280                                bnf_rule.description.push(BNFDescription::Tokens(identifier));
281                            },
282                            _ => (), // 无视空白字符
283                        }
284                    }
285                }
286                _ => panic!(), // 不可能有其他状态了
287            }
288        }
289        bnf_rule
290    }
291
292}
293