1#[cfg(any())]
4use std::collections::HashMap;
5use crate::{NameFixer, indent_source};
6use crate::fixed_sym_table::{FixedSymTable, SymInfoTable};
7use crate::{TokenId, VarId};
8use crate::parser::Symbol;
9#[cfg(test)]
10use lexigram_core::CollectJoin;
11
12#[derive(Clone, Debug)]
59pub struct SymbolTable {
60 t: Vec<(String, Option<String>)>, fixer_t: NameFixer, nt: Vec<String>, #[cfg(any())]
64 names: HashMap<String, VarId>, fixer_nt: NameFixer, }
67
68impl SymbolTable {
69 pub fn new() -> Self {
70 SymbolTable {
71 t: Vec::new(),
72 fixer_t: NameFixer::new_empty(),
73 nt: Vec::new(),
74 #[cfg(any())]
75 names: HashMap::new(),
76 fixer_nt: NameFixer::new_empty(),
77 }
78 }
79
80 pub fn to_fixed_sym_table(self) -> FixedSymTable {
81 FixedSymTable::new(self.t, self.nt)
82 }
83
84 pub fn add_terminal<T: Into<String>>(&mut self, name: T, value_maybe: Option<T>) -> TokenId {
87 let token = self.t.len();
88 assert!(token < TokenId::MAX as usize);
89 let unique_name = self.fixer_t.get_unique_name(name.into());
90 self.t.push((unique_name, value_maybe.map(|n| n.into())));
91 token as TokenId
92 }
93
94 pub fn extend_terminals<I: IntoIterator<Item=(T, Option<T>)>, T: Into<String>>(&mut self, terminals: I) {
95 for (s, maybe) in terminals {
96 self.add_terminal(s, maybe);
97 }
98 }
99
100 pub fn get_terminals(&self) -> impl Iterator<Item = &(String, Option<String>)> {
101 self.t.iter()
102 }
103
104 pub fn get_num_t(&self) -> usize {
105 self.t.len()
106 }
107
108 pub fn set_t_value(&mut self, token: TokenId, name_maybe: Option<String>) {
109 self.t[token as usize].1 = name_maybe
110 }
111
112 pub fn downsize_num_t(&mut self, num_t: usize) {
113 if num_t < self.t.len() {
114 for (name, _) in &mut self.t[num_t..] {
115 self.fixer_t.remove(name);
116 }
117 self.t.resize(num_t, (String::new(), None));
118 }
119 }
120
121 fn add_nt(&mut self, unique_name: String) -> VarId {
124 let var = self.nt.len();
125 assert!(var < VarId::MAX as usize);
126 #[cfg(any())]
127 self.names.insert(unique_name.clone(), var as VarId);
128 self.nt.push(unique_name);
129 var as VarId
130 }
131
132 pub fn add_nonterminal<T: Into<String>>(&mut self, name: T) -> VarId {
133 let unique_name = self.fixer_nt.get_unique_name(name.into());
134 self.add_nt(unique_name)
135 }
136
137 pub fn add_child_nonterminal(&mut self, var: VarId) -> VarId {
138 let unique_name = self.fixer_nt.get_unique_name_unum(self.nt[var as usize].clone());
139 self.add_nt(unique_name)
140 }
141
142 pub fn extend_nonterminals<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, nonterminals: I) {
143 for s in nonterminals {
144 self.add_nonterminal(s);
145 }
146 }
147
148 #[cfg(any())]
149 pub fn find_nonterminal(&self, name: &str) -> Option<VarId> {
150 self.names.get(name).cloned()
151 }
152
153 pub fn get_nonterminals(&self) -> impl Iterator<Item = &String> {
154 self.nt.iter()
155 }
156
157 pub fn get_num_nt(&self) -> usize {
158 self.nt.len()
159 }
160
161 pub fn remove_nonterminal(&mut self, v: VarId) {
162 let name = self.nt.remove(v as usize);
163 #[cfg(any())]
164 {
165 for old_v in self.names.values_mut() {
166 if *old_v >= v { *old_v -= 1; }
167 }
168 self.names.remove(&name);
169 }
170 self.fixer_nt.remove(&name);
171 }
172
173 pub fn set_nt_name(&mut self, var: VarId, name: String) {
174 self.nt[var as usize] = name;
175 }
176
177 pub fn find_nt(&self, name: &str) -> Option<VarId> {
178 self.nt.iter().position(|s| s == name).map(|n| n as VarId)
179 }
180
181 pub fn remove_nt_name(&mut self, var: VarId) -> String {
184 let mut removed = self.fixer_nt.get_unique_name_num(format!("{var}_removed"));
185 std::mem::swap(&mut self.nt[var as usize], &mut removed);
186 #[cfg(any())]
187 self.names.remove(&removed);
188 self.fixer_nt.remove(&removed);
189 removed
190 }
191
192 pub fn gen_source_code_t(&self, indent: usize, has_enum: bool, has_array: bool) -> String {
193 let mut source = Vec::<String>::new();
194 let (max_n, max_option) = self.get_terminals().fold((0, 0), |(n, option), t| {
195 (n.max(t.0.len()), option.max(t.1.as_ref().map_or(0, |o| o.len())))
196 });
197 if has_enum {
198 source.push("#[repr(u16)]".to_string());
201 source.push("enum T {".to_string());
202 for (i, (n, _option)) in self.get_terminals().enumerate() {
203 source.push(format!(" {n:max_n$}{} // {i}", if i == 0 { " = 0," } else { ", " }));
204 }
205 source.push("}".to_string());
206 }
207 if has_enum && has_array {
208 source.push(String::new());
209 }
210 if has_array {
211 source.push(format!("static TERMINALS: [(&str, Option<&str>); {}] = [", self.get_num_t()));
214 for (i, (n, option)) in self.get_terminals().enumerate() {
215 source.push(format!(" (\"{n}\",{:w1$}{}),{:w2$} // {i}",
216 "",
217 if let Some(o) = option { format!("Some(\"{o}\")") } else { "None".to_string() },
218 "",
219 w1 = max_n - n.len(), w2 = max_option + 4 - option.as_ref().map_or(0, |o| o.len() + 4)));
220 }
221 source.push("];".to_string());
222 }
223 indent_source(vec![source], indent)
224 }
225
226 #[cfg(test)]
229 pub fn dump(&self, title: &str) {
230 if !title.is_empty() {
231 println!("{title}");
232 }
233 println!(
234 "- nonterminals:\n{}",
235 self.get_nonterminals().enumerate().map(|(v, s)| format!(" - NT[{v}]: {s}")).join("\n"));
236 println!(
237 "- terminals:\n{}",
238 self.get_terminals().enumerate()
239 .map(|(t, (n, v_maybe))| format!(" - T[{t}]: {n}{}", if let Some(v) = v_maybe { format!(" = {v:?}") } else { String::new() }))
240 .join("\n"));
241
242 }
243}
244
245impl SymInfoTable for SymbolTable {
246 fn is_token_data(&self, token: TokenId) -> bool {
247 self.t.get(token as usize).map(|t| t.1.is_none()).unwrap_or(false)
248 }
249
250 fn is_symbol_t_data(&self, symbol: &Symbol) -> bool {
251 if let Symbol::T(token) = symbol {
252 self.t.get(*token as usize).map(|t| t.1.is_none()).unwrap_or(false)
253 } else {
254 false
255 }
256 }
257
258 fn is_symbol_t_fixed(&self, symbol: &Symbol) -> bool {
259 if let Symbol::T(token) = symbol {
260 if (*token as usize) < self.t.len() {
261 self.t.get(*token as usize).map(|t| t.1.is_some()).unwrap_or(false)
262 } else {
263 false
264 }
265 } else {
266 false
267 }
268 }
269
270 fn get_t_str(&self, token: TokenId) -> String {
271 match token {
272 _ if (token as usize) < self.t.len() => {
273 let (name, literal) = &self.t[token as usize];
274 literal.as_ref().unwrap_or(name).clone()
275 }
276 TokenId::MAX => "<bad character>".to_string(),
277 _ => format!("T({token}?)")
278 }
279 }
280
281 fn get_t_name(&self, token: TokenId) -> String {
282 if token as usize >= self.t.len() {
283 format!("T({token}?)")
284 } else {
285 self.t[token as usize].0.clone()
286 }
287 }
288
289 fn get_nt_name(&self, var: VarId) -> String {
290 if var as usize >= self.nt.len() { return format!("NT({var}?)") }
291 self.nt[var as usize].clone()
292 }
293
294 fn get_name(&self, symbol: &Symbol) -> String {
295 match symbol {
296 Symbol::Empty | Symbol::End => symbol.to_string(),
297 Symbol::T(token) => self.get_t_name(*token),
298 Symbol::NT(var) => self.get_nt_name(*var),
299 }
300 }
301
302 fn get_str(&self, symbol: &Symbol) -> String {
303 match symbol {
304 Symbol::Empty | Symbol::End => symbol.to_string(),
305 Symbol::T(token) => self.get_t_str(*token),
306 Symbol::NT(var) => self.get_nt_name(*var),
307 }
308 }
309
310 fn get_name_quote(&self, symbol: &Symbol) -> String {
311 match symbol {
312 Symbol::Empty | Symbol::End => symbol.to_string(),
313 Symbol::T(token) => if self.is_symbol_t_fixed(symbol) { format!("{:?}", self.get_t_str(*token)) } else { self.get_t_str(*token) },
314 Symbol::NT(var) => self.get_nt_name(*var),
315 }
316 }
317}
318
319impl Default for SymbolTable {
320 fn default() -> Self {
321 SymbolTable::new()
322 }
323}
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328
329 #[test]
330 fn general() {
331 let mut st = SymbolTable::new();
332 st.extend_nonterminals(["A".to_string(), "NOT_USED".to_string(), "E".to_string()]);
333 assert_eq!(st.add_child_nonterminal(0), 3);
334 assert_eq!(st.get_str(&Symbol::NT(3)), "A_1");
335 assert_eq!(st.add_child_nonterminal(0), 4);
336 assert_eq!(st.get_str(&Symbol::NT(4)), "A_2");
337 assert_eq!(st.add_child_nonterminal(2), 5);
338 assert_eq!(st.get_str(&Symbol::NT(5)), "E_1");
339 assert_eq!(st.add_child_nonterminal(1), 6);
340 st.remove_nonterminal(1);
341 assert_eq!(st.get_str(&Symbol::NT(2)), "A_1");
342 assert_eq!(st.get_str(&Symbol::NT(3)), "A_2");
343 assert_eq!(st.get_str(&Symbol::NT(4)), "E_1");
344 #[cfg(any())]
345 assert_eq!(st.find_nonterminal("A_1"), Some(2));
346 assert!(st.nt.contains(&"A_2".to_string()));
347 #[cfg(any())]
348 assert!(st.names.contains_key("A_2"));
349 assert!(st.fixer_nt.contains("A_2"));
350 st.remove_nt_name(3);
351 assert!(!st.nt.contains(&"A_2".to_string()));
352 #[cfg(any())]
353 assert!(!st.names.contains_key("A_2"));
354 assert!(!st.fixer_nt.contains("A_2"));
355 }
356
357 #[test]
358 fn terminals() {
359 let mut st = SymbolTable::new();
360 let tnames = vec![("a", Some("aa")), ("b", Some("bb")), ("a", Some("A")), ("c", None), ("d", None)];
361 st.extend_terminals(tnames);
362 assert_eq!(st.get_num_t(), 5);
363 let result = st.get_terminals().map(|(s, v)| (s.as_str(), v.as_ref().map(|s| s.as_str()))).to_vec();
364 assert_eq!(result, vec![("a", Some("aa")), ("b", Some("bb")), ("a1", Some("A")), ("c", None), ("d", None)]);
365 for name in vec!["a1", "c", "d"] {
366 assert_eq!(st.fixer_t.contains(name), true);
367 }
368 st.downsize_num_t(2);
369 let result = st.get_terminals().map(|(s, v)| (s.as_str(), v.as_ref().map(|s| s.as_str()))).to_vec();
370 assert_eq!(result, vec![("a", Some("aa")), ("b", Some("bb"))]);
371 assert_eq!(st.get_num_t(), 2);
372 for name in vec!["a1", "c", "d"] {
373 assert_eq!(st.fixer_t.contains(name), false);
374 }
375 let extra_tnames = vec![("a", Some("A")), ("c", None), ("d", None)];
376 st.extend_terminals(extra_tnames);
377 let result = st.get_terminals().map(|(s, v)| (s.as_str(), v.as_ref().map(|s| s.as_str()))).to_vec();
378 assert_eq!(result, vec![("a", Some("aa")), ("b", Some("bb")), ("a1", Some("A")), ("c", None), ("d", None)]);
379 }
380}