1#[cfg(any())]
4use std::collections::HashMap;
5use crate::{NameFixer, indent_source};
6use crate::fixed_sym_table::{FixedSymTable, SymInfoTable};
7use crate::{TokenId, VarId};
8use crate::parser::Symbol;
9use lexigram_core::CollectJoin;
10
11#[derive(Clone, Debug)]
58pub struct SymbolTable {
59 t: Vec<(String, Option<String>)>, fixer_t: NameFixer, nt: Vec<String>, #[cfg(any())]
63 names: HashMap<String, VarId>, fixer_nt: NameFixer, }
66
67impl SymbolTable {
68 pub fn new() -> Self {
69 SymbolTable {
70 t: Vec::new(),
71 fixer_t: NameFixer::new_empty(),
72 nt: Vec::new(),
73 #[cfg(any())]
74 names: HashMap::new(),
75 fixer_nt: NameFixer::new_empty(),
76 }
77 }
78
79 pub fn to_fixed_sym_table(self) -> FixedSymTable {
80 FixedSymTable::new(self.t, self.nt)
81 }
82
83 pub fn add_terminal<T: Into<String>>(&mut self, name: T, value_maybe: Option<T>) -> TokenId {
86 let token = self.t.len();
87 assert!(token < TokenId::MAX as usize);
88 let unique_name = self.fixer_t.get_unique_name(name.into());
89 self.t.push((unique_name, value_maybe.map(|n| n.into())));
90 token as TokenId
91 }
92
93 pub fn extend_terminals<I: IntoIterator<Item=(T, Option<T>)>, T: Into<String>>(&mut self, terminals: I) {
94 for (s, maybe) in terminals {
95 self.add_terminal(s, maybe);
96 }
97 }
98
99 pub fn get_terminals(&self) -> impl Iterator<Item = &(String, Option<String>)> {
100 self.t.iter()
101 }
102
103 pub fn get_num_t(&self) -> usize {
104 self.t.len()
105 }
106
107 pub fn set_t_value(&mut self, token: TokenId, name_maybe: Option<String>) {
108 self.t[token as usize].1 = name_maybe
109 }
110
111 pub fn downsize_num_t(&mut self, num_t: usize) {
112 if num_t < self.t.len() {
113 for (name, _) in &mut self.t[num_t..] {
114 self.fixer_t.remove(name);
115 }
116 self.t.resize(num_t, (String::new(), None));
117 }
118 }
119
120 fn add_nt(&mut self, unique_name: String) -> VarId {
123 let var = self.nt.len();
124 assert!(var < VarId::MAX as usize);
125 #[cfg(any())]
126 self.names.insert(unique_name.clone(), var as VarId);
127 self.nt.push(unique_name);
128 var as VarId
129 }
130
131 pub fn add_nonterminal<T: Into<String>>(&mut self, name: T) -> VarId {
132 let unique_name = self.fixer_nt.get_unique_name(name.into());
133 self.add_nt(unique_name)
134 }
135
136 pub fn add_child_nonterminal(&mut self, var: VarId) -> VarId {
137 let unique_name = self.fixer_nt.get_unique_name_unum(self.nt[var as usize].clone());
138 self.add_nt(unique_name)
139 }
140
141 pub fn extend_nonterminals<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, nonterminals: I) {
142 for s in nonterminals {
143 self.add_nonterminal(s);
144 }
145 }
146
147 #[cfg(any())]
148 pub fn find_nonterminal(&self, name: &str) -> Option<VarId> {
149 self.names.get(name).cloned()
150 }
151
152 pub fn get_nonterminals(&self) -> impl Iterator<Item = &String> {
153 self.nt.iter()
154 }
155
156 pub fn get_num_nt(&self) -> usize {
157 self.nt.len()
158 }
159
160 pub fn remove_nonterminal(&mut self, v: VarId) {
161 let name = self.nt.remove(v as usize);
162 #[cfg(any())]
163 {
164 for old_v in self.names.values_mut() {
165 if *old_v >= v { *old_v -= 1; }
166 }
167 self.names.remove(&name);
168 }
169 self.fixer_nt.remove(&name);
170 }
171
172 pub fn set_nt_name(&mut self, var: VarId, name: String) {
173 self.nt[var as usize] = name;
174 }
175
176 pub fn find_nt(&self, name: &str) -> Option<VarId> {
177 self.nt.iter().position(|s| s == name).map(|n| n as VarId)
178 }
179
180 pub fn remove_nt_name(&mut self, var: VarId) -> String {
183 let mut removed = self.fixer_nt.get_unique_name_num(format!("{var}_removed"));
184 std::mem::swap(&mut self.nt[var as usize], &mut removed);
185 #[cfg(any())]
186 self.names.remove(&removed);
187 self.fixer_nt.remove(&removed);
188 removed
189 }
190
191 pub fn gen_source_code_t(&self, indent: usize, has_enum: bool, has_array: bool) -> String {
192 let mut source = Vec::<String>::new();
193 let (max_n, max_option) = self.get_terminals().fold((0, 0), |(n, option), t| {
194 (n.max(t.0.len()), option.max(t.1.as_ref().map_or(0, |o| o.len())))
195 });
196 if has_enum {
197 source.push("#[repr(u16)]".to_string());
200 source.push("enum T {".to_string());
201 for (i, (n, _option)) in self.get_terminals().enumerate() {
202 source.push(format!(" {n:max_n$}{} // {i}", if i == 0 { " = 0," } else { ", " }));
203 }
204 source.push("}".to_string());
205 }
206 if has_enum && has_array {
207 source.push(String::new());
208 }
209 if has_array {
210 source.push(format!("static TERMINALS: [(&str, Option<&str>); {}] = [", self.get_num_t()));
213 for (i, (n, option)) in self.get_terminals().enumerate() {
214 source.push(format!(" (\"{n}\",{:w1$}{}),{:w2$} // {i}",
215 "",
216 if let Some(o) = option { format!("Some(\"{o}\")") } else { "None".to_string() },
217 "",
218 w1 = max_n - n.len(), w2 = max_option + 4 - option.as_ref().map_or(0, |o| o.len() + 4)));
219 }
220 source.push("];".to_string());
221 }
222 indent_source(vec![source], indent)
223 }
224
225 pub fn dump_str(&self) -> String {
228 let mut str = format!(
229 " - nonterminals:\n{}\n",
230 self.get_nonterminals().enumerate().map(|(v, s)| format!(" - NT[{v}]: {s}")).join("\n"));
231 str.push_str(&format!(
232 " - terminals:\n{}",
233 self.get_terminals().enumerate()
234 .map(|(t, (n, v_maybe))| format!(" - T[{t}]: {n}{}", if let Some(v) = v_maybe { format!(" = {v:?}") } else { String::new() }))
235 .join("\n")));
236 str
237 }
238
239 pub fn dump(&self, title: &str) {
240 if !title.is_empty() {
241 println!("{title}");
242 }
243 println!("{}", self.dump_str());
244 }
245}
246
247impl SymInfoTable for SymbolTable {
248 fn is_token_data(&self, token: TokenId) -> bool {
249 self.t.get(token as usize).map(|t| t.1.is_none()).unwrap_or(false)
250 }
251
252 fn is_symbol_t_data(&self, symbol: &Symbol) -> bool {
253 if let Symbol::T(token) = symbol {
254 self.t.get(*token as usize).map(|t| t.1.is_none()).unwrap_or(false)
255 } else {
256 false
257 }
258 }
259
260 fn is_symbol_t_fixed(&self, symbol: &Symbol) -> bool {
261 if let Symbol::T(token) = symbol {
262 if (*token as usize) < self.t.len() {
263 self.t.get(*token as usize).map(|t| t.1.is_some()).unwrap_or(false)
264 } else {
265 false
266 }
267 } else {
268 false
269 }
270 }
271
272 fn get_t_str(&self, token: TokenId) -> String {
273 match token {
274 _ if (token as usize) < self.t.len() => {
275 let (name, literal) = &self.t[token as usize];
276 literal.as_ref().unwrap_or(name).clone()
277 }
278 TokenId::MAX => "<bad character>".to_string(),
279 _ => format!("T({token}?)")
280 }
281 }
282
283 fn get_t_name(&self, token: TokenId) -> String {
284 if token as usize >= self.t.len() {
285 format!("T({token}?)")
286 } else {
287 self.t[token as usize].0.clone()
288 }
289 }
290
291 fn get_nt_name(&self, var: VarId) -> String {
292 if var as usize >= self.nt.len() { return format!("NT({var}?)") }
293 self.nt[var as usize].clone()
294 }
295
296 fn get_name(&self, symbol: &Symbol) -> String {
297 match symbol {
298 Symbol::Empty | Symbol::End => symbol.to_string(),
299 Symbol::T(token) => self.get_t_name(*token),
300 Symbol::NT(var) => self.get_nt_name(*var),
301 }
302 }
303
304 fn get_str(&self, symbol: &Symbol) -> String {
305 match symbol {
306 Symbol::Empty | Symbol::End => symbol.to_string(),
307 Symbol::T(token) => self.get_t_str(*token),
308 Symbol::NT(var) => self.get_nt_name(*var),
309 }
310 }
311
312 fn get_name_quote(&self, symbol: &Symbol) -> String {
313 match symbol {
314 Symbol::Empty | Symbol::End => symbol.to_string(),
315 Symbol::T(token) => if self.is_symbol_t_fixed(symbol) { format!("{:?}", self.get_t_str(*token)) } else { self.get_t_str(*token) },
316 Symbol::NT(var) => self.get_nt_name(*var),
317 }
318 }
319}
320
321impl Default for SymbolTable {
322 fn default() -> Self {
323 SymbolTable::new()
324 }
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330
331 #[test]
332 fn general() {
333 let mut st = SymbolTable::new();
334 st.extend_nonterminals(["A".to_string(), "NOT_USED".to_string(), "E".to_string()]);
335 assert_eq!(st.add_child_nonterminal(0), 3);
336 assert_eq!(st.get_str(&Symbol::NT(3)), "A_1");
337 assert_eq!(st.add_child_nonterminal(0), 4);
338 assert_eq!(st.get_str(&Symbol::NT(4)), "A_2");
339 assert_eq!(st.add_child_nonterminal(2), 5);
340 assert_eq!(st.get_str(&Symbol::NT(5)), "E_1");
341 assert_eq!(st.add_child_nonterminal(1), 6);
342 st.remove_nonterminal(1);
343 assert_eq!(st.get_str(&Symbol::NT(2)), "A_1");
344 assert_eq!(st.get_str(&Symbol::NT(3)), "A_2");
345 assert_eq!(st.get_str(&Symbol::NT(4)), "E_1");
346 #[cfg(any())]
347 assert_eq!(st.find_nonterminal("A_1"), Some(2));
348 assert!(st.nt.contains(&"A_2".to_string()));
349 #[cfg(any())]
350 assert!(st.names.contains_key("A_2"));
351 assert!(st.fixer_nt.contains("A_2"));
352 st.remove_nt_name(3);
353 assert!(!st.nt.contains(&"A_2".to_string()));
354 #[cfg(any())]
355 assert!(!st.names.contains_key("A_2"));
356 assert!(!st.fixer_nt.contains("A_2"));
357 }
358
359 #[test]
360 fn terminals() {
361 let mut st = SymbolTable::new();
362 let tnames = vec![("a", Some("aa")), ("b", Some("bb")), ("a", Some("A")), ("c", None), ("d", None)];
363 st.extend_terminals(tnames);
364 assert_eq!(st.get_num_t(), 5);
365 let result = st.get_terminals().map(|(s, v)| (s.as_str(), v.as_ref().map(|s| s.as_str()))).to_vec();
366 assert_eq!(result, vec![("a", Some("aa")), ("b", Some("bb")), ("a1", Some("A")), ("c", None), ("d", None)]);
367 for name in vec!["a1", "c", "d"] {
368 assert_eq!(st.fixer_t.contains(name), true);
369 }
370 st.downsize_num_t(2);
371 let result = st.get_terminals().map(|(s, v)| (s.as_str(), v.as_ref().map(|s| s.as_str()))).to_vec();
372 assert_eq!(result, vec![("a", Some("aa")), ("b", Some("bb"))]);
373 assert_eq!(st.get_num_t(), 2);
374 for name in vec!["a1", "c", "d"] {
375 assert_eq!(st.fixer_t.contains(name), false);
376 }
377 let extra_tnames = vec![("a", Some("A")), ("c", None), ("d", None)];
378 st.extend_terminals(extra_tnames);
379 let result = st.get_terminals().map(|(s, v)| (s.as_str(), v.as_ref().map(|s| s.as_str()))).to_vec();
380 assert_eq!(result, vec![("a", Some("aa")), ("b", Some("bb")), ("a1", Some("A")), ("c", None), ("d", None)]);
381 }
382}