kbnf_syntax/
simplified_grammar.rs

1use std::fmt::Display;
2
3use serde::Serialize;
4use string_interner::{symbol::SymbolU32, Symbol};
5
6use crate::{
7    node::{OperatorFlattenedNode, Rhs}, regex::FiniteStateAutomaton, suffix_automaton::SuffixAutomaton, InternedStrings
8};
9
10#[derive(Clone)]
11pub struct SimplifiedGrammar {
12    pub expressions: Vec<Rhs>,
13    pub start_symbol: SymbolU32,
14    pub interned_strings: InternedStrings,
15    pub id_to_regex: Vec<FiniteStateAutomaton>,
16    pub id_to_suffix_automaton: Vec<SuffixAutomaton>,
17}
18
19impl SimplifiedGrammar {
20    pub fn is_empty(&self) -> bool {
21        self.expressions.is_empty()
22    }
23}
24
25impl Display for SimplifiedGrammar {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        let mut buffer = String::new();
28        buffer.push_str(&format!(
29            "start_symbol: {}\n",
30            self.interned_strings
31                .nonterminals
32                .resolve(self.start_symbol)
33                .unwrap_or("None")
34        ));
35        for (lhs, rhs) in self.expressions.iter().enumerate() {
36            let lhs = self
37                .interned_strings
38                .nonterminals
39                .resolve(SymbolU32::try_from_usize(lhs).unwrap())
40                .unwrap();
41            buffer.push_str(lhs);
42            buffer.push_str(" ::= ");
43            for (j, alternation) in rhs.alternations.iter().enumerate() {
44                for (i, concatenation) in alternation.concatenations.iter().enumerate() {
45                    match concatenation {
46                        OperatorFlattenedNode::Terminal(value) => {
47                            let value = self.interned_strings.terminals.resolve(*value).unwrap();
48                            buffer.push_str(&format!("'{}'", value));
49                        }
50                        OperatorFlattenedNode::Substrings(value) => {
51                            let value = self.interned_strings.sub_strings.resolve(*value).unwrap();
52                            buffer.push_str(&format!("#substrs\"{}\"", value));
53                        }
54                        OperatorFlattenedNode::RegexString(value) => {
55                            let value =
56                                self.interned_strings.regex_strings.resolve(*value).unwrap();
57                            buffer.push_str(&format!("#\"{}\"", value));
58                        }
59                        OperatorFlattenedNode::EarlyEndRegexString(value) => {
60                            let value =
61                                self.interned_strings.regex_strings.resolve(*value).unwrap();
62                            buffer.push_str(&format!("#e\"{}\"", value));
63                        }
64                        OperatorFlattenedNode::RegexComplement(value) => {
65                            let value =
66                                self.interned_strings.regex_strings.resolve(*value).unwrap();
67                            buffer.push_str(&format!("#ex\"{}\"", value));
68                        }
69                        OperatorFlattenedNode::Nonterminal(value) => {
70                            let value = self.interned_strings.nonterminals.resolve(*value).unwrap();
71                            buffer.push_str(value);
72                        }
73                    }
74                    if i + 1 < alternation.concatenations.len() {
75                        buffer.push(' ');
76                    }
77                }
78                if j + 1 < rhs.alternations.len() {
79                    buffer.push_str(" | ");
80                }
81            }
82            buffer.push_str(";\n");
83        }
84        write!(f, "{}", buffer)
85    }
86}
87
88impl std::fmt::Debug for SimplifiedGrammar {
89    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90        let mut buffer = String::new();
91        buffer.push_str(&format!(
92            "start_symbol: {}(ID: {})\n",
93            self.interned_strings
94                .nonterminals
95                .resolve(self.start_symbol)
96                .unwrap_or("None"),
97            self.start_symbol.to_usize()
98        ));
99        for (lhs, rhs) in self.expressions.iter().enumerate() {
100            let lhs = self
101                .interned_strings
102                .nonterminals
103                .resolve(SymbolU32::try_from_usize(lhs).unwrap())
104                .unwrap();
105            buffer.push_str(lhs);
106            buffer.push_str(" ::= ");
107            for (j, alternation) in rhs.alternations.iter().enumerate() {
108                for (i, concatenation) in alternation.concatenations.iter().enumerate() {
109                    match concatenation {
110                        OperatorFlattenedNode::Terminal(value) => {
111                            let terminal = self.interned_strings.terminals.resolve(*value).unwrap();
112                            buffer.push_str(&format!("'{}'(ID: {})", terminal, value.to_usize()));
113                        }
114                        OperatorFlattenedNode::Substrings(value) => {
115                            let substrings =
116                                self.interned_strings.sub_strings.resolve(*value).unwrap();
117                            buffer.push_str(&format!(
118                                "#substrs\"{}\"(ID: {})",
119                                substrings,
120                                value.to_usize()
121                            ));
122                        }
123                        OperatorFlattenedNode::RegexString(value) => {
124                            let regex =
125                                self.interned_strings.regex_strings.resolve(*value).unwrap();
126                            let regex_type = match self.id_to_regex[value.to_usize()] {
127                                FiniteStateAutomaton::Dfa(_) => "DFA",
128                            };
129                            buffer.push_str(&format!(
130                                "#\"{}\"(ID: {},type: {})",
131                                regex,
132                                value.to_usize(),
133                                regex_type
134                            ));
135                        }
136                        OperatorFlattenedNode::EarlyEndRegexString(value) => {
137                            let regex =
138                                self.interned_strings.regex_strings.resolve(*value).unwrap();
139                            let regex_type = match self.id_to_regex[value.to_usize()] {
140                                FiniteStateAutomaton::Dfa(_) => "DFA",
141                            };
142                            buffer.push_str(&format!(
143                                "#e\"{}\"(ID: {},type: {})",
144                                regex,
145                                value.to_usize(),
146                                regex_type
147                            ));
148                        }
149                        OperatorFlattenedNode::RegexComplement(value) => {
150                            let regex =
151                                self.interned_strings.regex_strings.resolve(*value).unwrap();
152                            let regex_type = match self.id_to_regex[value.to_usize()] {
153                                FiniteStateAutomaton::Dfa(_) => "DFA",
154                            };
155                            buffer.push_str(&format!(
156                                "#ex\"{}\"(ID: {},type: {})",
157                                regex,
158                                value.to_usize(),
159                                regex_type
160                            ));
161                        }
162                        OperatorFlattenedNode::Nonterminal(value) => {
163                            let nonterminal =
164                                self.interned_strings.nonterminals.resolve(*value).unwrap();
165                            buffer.push_str(&format!("{}(ID: {})", nonterminal, value.to_usize()));
166                        }
167                    }
168                    if i + 1 < alternation.concatenations.len() {
169                        buffer.push(' ');
170                    }
171                }
172                if j + 1 < rhs.alternations.len() {
173                    buffer.push_str(" | ");
174                }
175            }
176            buffer.push_str(";\n");
177        }
178        write!(f, "{}", buffer)
179    }
180}
181
182impl Serialize for SimplifiedGrammar {
183    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
184    where
185        S: serde::Serializer,
186    {
187        serializer.collect_str(&self)
188    }
189}