kbnf_syntax/
simplified_grammar.rs1use std::fmt::Display;
2
3use serde::Serialize;
4use string_interner::{symbol::SymbolU32, Symbol};
5
6use crate::{
7 node::{OperatorFlattenedNode, Rhs}, regex::FiniteStateAutomaton, suffix_automaton::SuffixAutomaton, InternedStrings
8};
9
10#[derive(Clone)]
11pub struct SimplifiedGrammar {
12 pub expressions: Vec<Rhs>,
13 pub start_symbol: SymbolU32,
14 pub interned_strings: InternedStrings,
15 pub id_to_regex: Vec<FiniteStateAutomaton>,
16 pub id_to_suffix_automaton: Vec<SuffixAutomaton>,
17}
18
19impl SimplifiedGrammar {
20 pub fn is_empty(&self) -> bool {
21 self.expressions.is_empty()
22 }
23}
24
25impl Display for SimplifiedGrammar {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 let mut buffer = String::new();
28 buffer.push_str(&format!(
29 "start_symbol: {}\n",
30 self.interned_strings
31 .nonterminals
32 .resolve(self.start_symbol)
33 .unwrap_or("None")
34 ));
35 for (lhs, rhs) in self.expressions.iter().enumerate() {
36 let lhs = self
37 .interned_strings
38 .nonterminals
39 .resolve(SymbolU32::try_from_usize(lhs).unwrap())
40 .unwrap();
41 buffer.push_str(lhs);
42 buffer.push_str(" ::= ");
43 for (j, alternation) in rhs.alternations.iter().enumerate() {
44 for (i, concatenation) in alternation.concatenations.iter().enumerate() {
45 match concatenation {
46 OperatorFlattenedNode::Terminal(value) => {
47 let value = self.interned_strings.terminals.resolve(*value).unwrap();
48 buffer.push_str(&format!("'{}'", value));
49 }
50 OperatorFlattenedNode::Substrings(value) => {
51 let value = self.interned_strings.sub_strings.resolve(*value).unwrap();
52 buffer.push_str(&format!("#substrs\"{}\"", value));
53 }
54 OperatorFlattenedNode::RegexString(value) => {
55 let value =
56 self.interned_strings.regex_strings.resolve(*value).unwrap();
57 buffer.push_str(&format!("#\"{}\"", value));
58 }
59 OperatorFlattenedNode::EarlyEndRegexString(value) => {
60 let value =
61 self.interned_strings.regex_strings.resolve(*value).unwrap();
62 buffer.push_str(&format!("#e\"{}\"", value));
63 }
64 OperatorFlattenedNode::RegexComplement(value) => {
65 let value =
66 self.interned_strings.regex_strings.resolve(*value).unwrap();
67 buffer.push_str(&format!("#ex\"{}\"", value));
68 }
69 OperatorFlattenedNode::Nonterminal(value) => {
70 let value = self.interned_strings.nonterminals.resolve(*value).unwrap();
71 buffer.push_str(value);
72 }
73 }
74 if i + 1 < alternation.concatenations.len() {
75 buffer.push(' ');
76 }
77 }
78 if j + 1 < rhs.alternations.len() {
79 buffer.push_str(" | ");
80 }
81 }
82 buffer.push_str(";\n");
83 }
84 write!(f, "{}", buffer)
85 }
86}
87
88impl std::fmt::Debug for SimplifiedGrammar {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 let mut buffer = String::new();
91 buffer.push_str(&format!(
92 "start_symbol: {}(ID: {})\n",
93 self.interned_strings
94 .nonterminals
95 .resolve(self.start_symbol)
96 .unwrap_or("None"),
97 self.start_symbol.to_usize()
98 ));
99 for (lhs, rhs) in self.expressions.iter().enumerate() {
100 let lhs = self
101 .interned_strings
102 .nonterminals
103 .resolve(SymbolU32::try_from_usize(lhs).unwrap())
104 .unwrap();
105 buffer.push_str(lhs);
106 buffer.push_str(" ::= ");
107 for (j, alternation) in rhs.alternations.iter().enumerate() {
108 for (i, concatenation) in alternation.concatenations.iter().enumerate() {
109 match concatenation {
110 OperatorFlattenedNode::Terminal(value) => {
111 let terminal = self.interned_strings.terminals.resolve(*value).unwrap();
112 buffer.push_str(&format!("'{}'(ID: {})", terminal, value.to_usize()));
113 }
114 OperatorFlattenedNode::Substrings(value) => {
115 let substrings =
116 self.interned_strings.sub_strings.resolve(*value).unwrap();
117 buffer.push_str(&format!(
118 "#substrs\"{}\"(ID: {})",
119 substrings,
120 value.to_usize()
121 ));
122 }
123 OperatorFlattenedNode::RegexString(value) => {
124 let regex =
125 self.interned_strings.regex_strings.resolve(*value).unwrap();
126 let regex_type = match self.id_to_regex[value.to_usize()] {
127 FiniteStateAutomaton::Dfa(_) => "DFA",
128 };
129 buffer.push_str(&format!(
130 "#\"{}\"(ID: {},type: {})",
131 regex,
132 value.to_usize(),
133 regex_type
134 ));
135 }
136 OperatorFlattenedNode::EarlyEndRegexString(value) => {
137 let regex =
138 self.interned_strings.regex_strings.resolve(*value).unwrap();
139 let regex_type = match self.id_to_regex[value.to_usize()] {
140 FiniteStateAutomaton::Dfa(_) => "DFA",
141 };
142 buffer.push_str(&format!(
143 "#e\"{}\"(ID: {},type: {})",
144 regex,
145 value.to_usize(),
146 regex_type
147 ));
148 }
149 OperatorFlattenedNode::RegexComplement(value) => {
150 let regex =
151 self.interned_strings.regex_strings.resolve(*value).unwrap();
152 let regex_type = match self.id_to_regex[value.to_usize()] {
153 FiniteStateAutomaton::Dfa(_) => "DFA",
154 };
155 buffer.push_str(&format!(
156 "#ex\"{}\"(ID: {},type: {})",
157 regex,
158 value.to_usize(),
159 regex_type
160 ));
161 }
162 OperatorFlattenedNode::Nonterminal(value) => {
163 let nonterminal =
164 self.interned_strings.nonterminals.resolve(*value).unwrap();
165 buffer.push_str(&format!("{}(ID: {})", nonterminal, value.to_usize()));
166 }
167 }
168 if i + 1 < alternation.concatenations.len() {
169 buffer.push(' ');
170 }
171 }
172 if j + 1 < rhs.alternations.len() {
173 buffer.push_str(" | ");
174 }
175 }
176 buffer.push_str(";\n");
177 }
178 write!(f, "{}", buffer)
179 }
180}
181
182impl Serialize for SimplifiedGrammar {
183 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
184 where
185 S: serde::Serializer,
186 {
187 serializer.collect_str(&self)
188 }
189}