1use super::*;
4use crate::{columns_to_str, AltId, SymbolTable, VarId};
5use lexigram_core::alt::{alt_to_rule_str, Alternative};
6use lexigram_core::CollectJoin;
7
8pub type ProdRule = Vec<Alternative>;
20
21pub fn prule_to_str(prule: &ProdRule, symbol_table: Option<&SymbolTable>) -> String {
22 prule.iter().map(|alt| alt.to_str(symbol_table)).join(" | ")
23}
24
25pub fn prule_to_rule_str(nt: VarId, prule: &ProdRule, symbol_table: Option<&SymbolTable>) -> String {
26 format!("{} -> {}", Symbol::NT(nt).to_str(symbol_table), prule.iter().map(|alt| alt.to_str(symbol_table)).join(" | "))
27
28}
29
30pub fn prule_to_macro(prule: &ProdRule) -> String {
31 format!("prule!({})", prule.iter().map(|alt| alt.to_macro_item()).join("; "))
32}
33
34#[derive(Clone, Copy, PartialEq, Debug)]
35enum AltType { Independant, LeftAssoc, Prefix, RightAssoc, Suffix }
36
37impl AltType {
38 fn from(nt: &Symbol, alt: &Alternative) -> Self {
39 let left = matches!(alt.first(), Some(var) if var == nt);
40 let right = alt.len() > 1 && matches!(alt.last(), Some(var) if var == nt);
41 match (left, right) {
42 (false, false) => AltType::Independant,
43 (false, true) => AltType::Prefix,
44 (true, false) => AltType::Suffix,
45 (true, true) => if alt.flags & ruleflag::R_ASSOC != 0 { AltType::RightAssoc } else { AltType::LeftAssoc },
46 }
47 }
48}
49
50#[derive(Debug, Clone)]
51struct AltInfo {
52 #[allow(unused)]
53 pred_priority: Option<AltId>,
54 ivar: usize,
55 ty: AltType
56}
57
58#[derive(Debug)]
59pub struct LLParsingTable {
60 pub num_nt: usize,
61 pub num_t: usize,
62 pub alts: Vec<(VarId, Alternative)>,
63 pub table: Vec<AltId>,
64 pub flags: Vec<u32>, pub parent: Vec<Option<VarId>>, }
67
68impl LLParsingTable {
69 pub fn new() -> Self {
70 LLParsingTable { num_nt: 0, num_t: 0, alts: vec![], table: vec![], flags: vec![], parent: vec![] }
71 }
72
73 pub fn get_top_parent(&self, nt: VarId) -> VarId {
74 let mut var = nt;
75 while let Some(parent) = self.parent[var as usize] {
76 var = parent;
77 }
78 var
79 }
80}
81
82#[derive(Clone, Debug)]
83pub struct ProdRuleSet<T> {
84 pub(super) prules: Vec<ProdRule>,
85 pub(crate) origin: Origin<VarId, FromPRS>,
86 pub(super) num_nt: usize,
87 pub(super) num_t: usize,
88 pub(crate) symbol_table: Option<SymbolTable>,
89 pub(super) flags: Vec<u32>,
90 pub(super) parent: Vec<Option<VarId>>,
91 pub(super) start: Option<VarId>,
92 pub(crate) name: Option<String>,
93 pub(crate) nt_conversion: HashMap<VarId, NTConversion>,
94 pub(crate) log: BufLog,
95 pub(super) _phantom: PhantomData<T>
96}
97
98impl<T> ProdRuleSet<T> {
99 pub fn get_start(&self) -> Option<VarId> {
101 self.start
102 }
103
104 pub fn set_start(&mut self, start: VarId) {
106 self.start = Some(start);
107 }
108
109 pub fn get_name(&self) -> Option<&String> {
110 self.name.as_ref()
111 }
112
113 pub fn set_name(&mut self, name: Option<String>) {
114 self.name = name;
115 }
116
117 pub fn get_next_available_var(&self) -> VarId {
119 self.prules.len() as VarId }
121
122 pub fn get_prules_iter(&self) -> impl Iterator<Item=(VarId, &ProdRule)> {
124 self.prules.iter().index().filter_map(|(id, p)| if p.is_empty() { None } else { Some((id, p)) })
125 }
126
127 pub fn get_prules_iter_mut(&mut self) -> impl Iterator<Item=(VarId, &mut ProdRule)> {
128 self.prules.iter_mut().enumerate().filter_map(|(id, p)| if p.is_empty() { None } else { Some((id as VarId, p)) })
129 }
130
131 pub fn get_alts(&self) -> impl Iterator<Item=(VarId, &Alternative)> {
132 self.prules.iter().enumerate()
133 .flat_map(|(v, p)| p.iter().map(move |alt| (v as VarId, alt)))
134 }
135
136 pub fn set_symbol_table(&mut self, symbol_table: SymbolTable) {
137 self.symbol_table = Some(symbol_table);
138 }
139
140 pub fn get_symbol_table(&self) -> Option<&SymbolTable> {
141 self.symbol_table.as_ref()
142 }
143
144 pub fn get_num_nt(&self) -> usize {
145 self.num_nt
146 }
147
148 pub fn get_num_t(&self) -> usize {
149 self.num_t
150 }
151
152 pub fn set_flags(&mut self, nt: VarId, new_flags: u32) {
155 let nt = nt as usize;
156 if nt >= self.flags.len() {
157 self.flags.resize(nt + 1, 0);
158 }
159 self.flags[nt] |= new_flags;
160 }
161
162 pub fn get_flags(&self, nt: VarId) -> u32 {
163 let nt = nt as usize;
164 if nt < self.flags.len() {
165 self.flags[nt]
166 } else {
167 0
168 }
169 }
170
171 pub(super) fn set_parent(&mut self, child: VarId, parent: VarId) {
172 let child = child as usize;
173 if child >= self.parent.len() {
174 self.parent.resize(child + 1, None);
175 }
176 self.parent[child] = Some(parent);
177 }
178
179 #[allow(unused)]
180 #[cfg(test)] pub(super) fn get_parent(&self, child: VarId) -> Option<VarId> {
182 let child = child as usize;
183 if child >= self.parent.len() {
184 None
185 } else {
186 self.parent[child]
187 }
188 }
189
190 fn get_top_parent(&self, nt: VarId) -> VarId {
191 let mut var = nt;
192 while let Some(parent) = self.parent[var as usize] {
193 var = parent;
194 }
195 var
196 }
197
198 pub(super) fn calc_num_symbols(&mut self) {
205 self.num_nt = self.prules.len();
206 self.num_t = self.prules.iter().map(|p|
207 p.iter().map(|f|
208 f.iter().filter_map(|s|
209 if let Symbol::T(v) = s { Some(*v + 1) } else { None }
210 ).max().unwrap_or(0)
211 ).max().unwrap_or(0)
212 ).max().unwrap_or(0) as usize;
213 self.symbol_table.as_mut().map(|st| st.downsize_num_t(self.num_t));
214 self.flags.resize(self.num_nt, 0);
215 self.parent.resize(self.num_nt, None);
216 }
217
218 pub(super) fn simplify(&mut self) {
220 for p in &mut self.prules {
221 let mut has_empty = false;
222 let mut i = 0;
223 while i < p.len() {
224 let alt = p.get_mut(i).unwrap();
225 let mut j = 0;
226 while j < alt.len() {
227 if alt[j].is_empty() && (j > 0 || j + 1 < alt.len()) {
228 alt.v.remove(j);
229 } else {
230 j += 1;
231 }
232 }
233 if alt.len() == 1 && alt[0].is_empty() {
234 if has_empty {
235 p.remove(i);
236 } else {
237 has_empty = true;
238 i += 1;
239 }
240 } else {
241 i += 1;
242 }
243 }
244 }
245 }
246
247 fn check_num_nt_coherency(&mut self) {
248 if let Some(n) = self.symbol_table.as_ref().and_then(|table| Some(table.get_num_nt())) {
249 let num_nt = self.prules.len();
250 if n != num_nt {
251 self.log.add_error(format!("there are {num_nt} rules but the symbol table has {n} nonterminal symbols: dropping the table"));
252 self.symbol_table = None;
253 }
254 }
255 }
256
257 fn cleanup_symbols(&mut self, keep: &mut HashSet<Symbol>) {
262 const VERBOSE: bool = false;
263 let mut nt_content = (0..self.num_nt).map(|v| if self.parent[v].is_none() { Some(v as VarId) } else { None }).to_vec();
264 for (from, cnv) in self.nt_conversion.iter() {
265 let MovedTo(to) = cnv else { panic!("{cnv:?} unexpected") };
267 nt_content[*from as usize] = None;
268 nt_content[*to as usize] = Some(*from);
269 }
270 if VERBOSE {
271 println!("Removing unused nonterminals:");
272 let mut all_h = self.prules.iter().flat_map(|p| p.iter().map(|x| &x.v).flatten()).cloned().collect::<HashSet<_>>();
273 all_h.extend((0..self.num_nt).map(|i| Symbol::NT(i as VarId)));
274 let mut all = all_h.into_iter().collect::<Vec<_>>();
275 all.sort();
276 println!(" current NT symbols: {}", all.iter().filter_map(|s|
277 if let Symbol::NT(_) = s { Some(format!("{}", s.to_str(self.get_symbol_table()))) } else { None }).join(", "));
278 println!(" current T symbols: {}", all.iter().filter_map(|s|
279 if let Symbol::T(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(" "));
280 let mut used = keep.iter().collect::<Vec<_>>();
281 used.sort();
282 println!(" used NT symbols: {}", used.iter().filter_map(|s| if let Symbol::NT(v) = s { Some((v, s)) } else { None })
283 .enumerate().map(|(new_id, (id, s))| format!("{}({id} -> {new_id})", s.to_str(self.get_symbol_table()))).join(", "));
284 println!(" used T symbols: {}", used.iter().filter_map(|s|
285 if let Symbol::T(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(" "));
286 println!(" nt_conversion: {:?}", self.nt_conversion);
287 println!(" nt_content: {}", nt_content.iter().enumerate()
288 .filter_map(|(v, f)| if let Some(from) = f { Some(format!("{v}:{from}")) } else { None } )
289 .join(", ")
290 );
291 }
292 self.nt_conversion.clear();
293 let new_num_nt = keep.iter().filter(|s| matches!(s, Symbol::NT(_))).count() as VarId;
294 let mut new_v = new_num_nt;
295 let mut conv = HashMap::<VarId, VarId>::new();
296 for i in (0..self.num_nt).rev() {
297 let v = i as VarId;
298 let symbol = Symbol::NT(v);
299 if !keep.contains(&symbol) {
300 if VERBOSE { println!("- deleting {}", symbol.to_str(self.get_symbol_table())); }
301 if self.parent[i].is_none() {
302 self.nt_conversion.insert(v, Removed);
303 }
304 nt_content.remove(i);
305 self.prules.remove(i);
306 self.start = self.start.map(|s| if s >= v { s - 1 } else { s });
307 self.symbol_table.as_mut().map(|t| t.remove_nonterminal(v));
308 self.flags.remove(i);
309 self.parent.remove(i);
310 if self.origin.trees.len() > i {
311 self.origin.trees.remove(i);
312 }
313
314 } else {
315 new_v -= 1;
316 conv.insert(v, new_v);
317 if VERBOSE { println!("- {symbol:?} -> {:?}", Symbol::NT(new_v)); }
318 }
319 }
320 for p in &mut self.prules {
321 for f in p {
322 for s in &mut f.v {
323 if let Symbol::NT(s_var) = s {
324 if let Some(new) = conv.get(s_var) {
325 *s = Symbol::NT(*new);
326 }
327 }
328 }
329 if let Some((ref mut var, _id)) = f.origin {
330 if let Some(new_var) = conv.get(&var) {
331 *var = *new_var;
332 }
333 }
334 }
335 }
336 for p in &mut self.parent {
337 if let Some(parent) = p {
338 if let Some(new) = conv.get(parent) {
339 *p = Some(*new);
340 }
341 }
342 }
343 for t in &mut self.origin.trees {
344 for mut node in t.iter_depth_simple_mut() {
345 match *node {
346 GrNode::Symbol(Symbol::NT(ref mut v)) => {
347 if let Some(new_v) = conv.get(&v) {
348 *v = *new_v;
349 }
350 }
351 _ => {}
352 }
353 }
354 }
355 let new_map = self.origin.map.iter()
356 .map(|(v1, (v2, id))| (*conv.get(&v1).unwrap_or(&v1), (*conv.get(&v2).unwrap_or(&v2), *id)))
357 .collect::<HashMap<_, _>>();
358 self.origin.map = new_map;
362 keep.retain(|s| !matches!(s, Symbol::NT(_)));
363 keep.extend((0..new_num_nt as VarId).map(|v| Symbol::NT(v)));
364 self.num_nt = new_num_nt as usize;
365 if VERBOSE {
366 println!("-> nt_content: {}", nt_content.iter().enumerate()
367 .filter_map(|(v, f)| if let Some(from) = f { Some(format!("{v}:{from}")) } else { None })
368 .join(", ")
369 );
370 }
371 for (to, f) in nt_content.into_iter().index() {
372 if let Some(from) = f {
373 if from != to {
374 self.nt_conversion.insert(from, MovedTo(to as VarId));
375 } else {
376 if self.nt_conversion.get(&from) == Some(&Removed) {
377 self.nt_conversion.remove(&from);
378 }
379 }
380 }
381 };
382 if VERBOSE { println!("-> nt_conversion: {:?}", self.nt_conversion); }
383 }
384
385 pub fn calc_first(&mut self) -> HashMap<Symbol, HashSet<Symbol>> {
386 const VERBOSE: bool = false;
387 if self.start.is_none() {
388 self.log.add_error("calc_first: start NT symbol not defined");
389 }
390 if self.num_nt == 0 {
391 self.log.add_error("calc_first: no nonterminal in grammar".to_string());
392 }
393 if !self.log.has_no_errors() {
394 return HashMap::new();
395 }
396 let mut symbols = HashSet::<Symbol>::new();
397 let mut stack = vec![Symbol::NT(self.start.unwrap())];
398 while let Some(sym) = stack.pop() {
399 if !symbols.contains(&sym) {
400 symbols.insert(sym);
401 if let Symbol::NT(v) = sym {
402 stack.extend(self.prules[v as usize].iter().map(|x| &x.v).flatten());
403 }
404 }
405 }
406 if symbols.iter().filter(|s| matches!(s, Symbol::NT(_))).count() != self.num_nt {
407 let nt_removed = (0..self.num_nt as VarId)
408 .filter(|v| !symbols.contains(&Symbol::NT(*v)) && !matches!(self.nt_conversion.get(v), Some(MovedTo(_))))
410 .map(|v| Symbol::NT(v))
411 .to_vec();
412 if !nt_removed.is_empty() {
413 self.log.add_warning(format!("calc_first: unused nonterminals: {}",
414 nt_removed.into_iter().map(|s| format!("{:?} = {}", s, s.to_str(self.get_symbol_table()))).join(", ")));
415 }
416 self.cleanup_symbols(&mut symbols);
417 }
418
419 let unused_t = (0..self.num_t)
420 .filter_map(|t_id| {
421 let s = Symbol::T(t_id as VarId);
422 if !symbols.contains(&s) { Some(format!("T({t_id}) = {}", s.to_str(self.get_symbol_table()))) } else { None }
423 }).to_vec();
424 if unused_t.len() > 0 {
425 self.log.add_warning(format!("calc_first: unused terminals: {}", unused_t.join(", ")))
426 }
427
428 let mut first = symbols.into_iter().map(|sym| {
429 match &sym {
430 Symbol::T(_) | Symbol::Empty => (sym, hashset![sym]),
431 Symbol::NT(_) => (sym, HashSet::new()),
432 Symbol::End => panic!("found reserved symbol {sym:?} in production rules"),
433 }
434 }).collect::<HashMap<_, _>>();
435 let mut change = true;
436 let rules = (0..self.num_nt as VarId).filter(|var| first.contains_key(&Symbol::NT(*var))).to_vec();
437 if VERBOSE { println!("rules: {}", rules.iter().map(|v| Symbol::NT(*v).to_str(self.symbol_table.as_ref())).join(", ")); }
438 while change {
439 change = false;
440 for i in &rules {
441 let prule = &self.prules[*i as usize];
442 let symbol = Symbol::NT(*i as VarId);
443 if VERBOSE { println!("- {} -> {}", symbol.to_str(self.symbol_table.as_ref()), prule_to_str(prule, self.symbol_table.as_ref())); }
444 let num_items = first[&symbol].len();
445 for alt in prule {
446 if VERBOSE { println!(" - {}", alt.to_str(self.symbol_table.as_ref())); }
447 assert!(alt.len() > 0, "empty alternative for {}: {}",
448 symbol.to_str(self.symbol_table.as_ref()), alt.to_str(self.symbol_table.as_ref()));
449 if VERBOSE {
450 print!(" [0] {}", alt[0].to_str(self.symbol_table.as_ref()));
451 println!(", first = {}", first[&alt[0]].iter().map(|s| s.to_str(self.symbol_table.as_ref())).join(", "));
452 }
453 let new = alt.calc_alt_first(&first);
454 let _n = first.get(&symbol).unwrap().len();
455 first.get_mut(&symbol).unwrap().extend(new);
456 if VERBOSE {
457 if first.get(&symbol).unwrap().len() > _n {
458 println!(" first[{}] -> {}", symbol.to_str(self.get_symbol_table()),
459 first.get(&symbol).unwrap().iter().map(|s| s.to_str(self.get_symbol_table())).join(", "));
460 }
461 }
462 }
463 change |= first[&symbol].len() > num_items;
464 }
465 if VERBOSE && change { println!("---------------------------- again"); }
466 }
467 if self.num_t == 0 {
468 self.log.add_error("calc_first: no terminal in grammar".to_string());
469 }
470 first
471 }
472
473 pub fn calc_follow(&self, first: &HashMap<Symbol, HashSet<Symbol>>) -> HashMap<Symbol, HashSet<Symbol>> {
474 const VERBOSE: bool = false;
475 assert!(self.start.is_some(), "start NT symbol not defined");
476 if !self.log.has_no_errors() {
477 return HashMap::new();
478 }
479 let mut follow = first.iter()
480 .filter_map(|(s, _)| if matches!(s, Symbol::NT(_)) { Some((*s, HashSet::<Symbol>::new())) } else { None })
481 .collect::<HashMap<_, _>>();
482 follow.get_mut(&Symbol::NT(self.start.unwrap())).unwrap().insert(Symbol::End);
483 let rules = (0..self.num_nt as VarId).filter(|var| follow.contains_key(&Symbol::NT(*var))).to_vec();
484 let mut change = true;
485 while change {
486 change = false;
487 for i in &rules {
488 let prule = &self.prules[*i as usize];
489 let symbol = Symbol::NT(*i as VarId);
490 if VERBOSE { println!("- {} -> {}", symbol.to_str(self.symbol_table.as_ref()), prule_to_str(prule, self.symbol_table.as_ref())); }
491 for alt in prule {
492 if VERBOSE { println!(" - {}", alt.to_str(self.symbol_table.as_ref())); }
493 let mut trail = follow.get(&symbol).unwrap().clone();
494 for sym_i in alt.iter().rev() {
495 if let Symbol::NT(_) = sym_i {
496 let num_items = follow.get(sym_i).unwrap().len();
497 follow.get_mut(sym_i).unwrap().extend(&trail);
498 if VERBOSE {
499 if follow.get(sym_i).unwrap().len() > num_items {
500 println!(" follow[{}] -> {}", sym_i.to_str(self.get_symbol_table()),
501 follow.get(sym_i).unwrap().iter().map(|s| s.to_str(self.get_symbol_table())).join(", "));
502 }
503 }
504 change |= follow.get(sym_i).unwrap().len() > num_items;
505 if first[sym_i].contains(&Symbol::Empty) {
506 trail.extend(first[sym_i].iter().filter(|s| *s != &Symbol::Empty));
507 } else {
508 trail.clear();
509 trail.extend(&first[sym_i]);
510 }
511 } else {
512 trail.clear();
513 trail.insert(*sym_i);
514 }
515 }
516 }
517 }
518 if VERBOSE && change { println!("---------------------------- again"); }
519 }
520 follow
521 }
522
523 pub(super) fn remove_recursion(&mut self) {
537 const MAX_DISTRIB_LEN: Option<usize> = None; const DONT_DISTRIB_IN_AMBIG: bool = true; const VERBOSE: bool = false;
543
544 self.log.add_note("removing left / binary recursion in grammar...");
545 self.check_num_nt_coherency();
546 if VERBOSE {
547 println!("ORIGINAL:");
548 self.print_rules(false, false);
549 }
550 let mut var_new = self.get_next_available_var() as usize;
551 let mut prules = take(&mut self.prules);
553 let mut ambig_alt_id = 0;
554 for var in 0..var_new {
555 let prule = prules.get_mut(var).unwrap();
556 let var = var as VarId;
557 let symbol = Symbol::NT(var);
558 let var_name = symbol.to_str(self.get_symbol_table());
559 let mut extra_prods = Vec::<ProdRule>::new();
560 if prule.iter().any(|p| !p.is_empty() && (p.first().unwrap() == &symbol)) {
561 let orig_str = format!("{var_name} -> {}", prule_to_str(prule, self.get_symbol_table()));
562 self.log.add_note(format!("- modifying: {orig_str}"));
563 if VERBOSE { println!("processing: {orig_str}"); }
564
565 let (indep, mut alts) : (Vec<_>, Vec<_>) = take(prule).into_iter()
566 .partition(|alt| alt.first().unwrap() != &symbol && alt.last().unwrap() != &symbol);
567 alts.reverse();
568 let mut prec_eq = alts.iter_mut()
569 .map(|f| {
570 let p = f.flags & ruleflag::PREC_EQ != 0;
571 f.flags &= !ruleflag::PREC_EQ;
572 p
573 }).to_vec();
574 prec_eq.pop();
575 prec_eq.insert(0, false);
576 if indep.is_empty() {
577 self.log.add_error(format!("recursive rules must have at least one independent alternative: {} -> {}",
578 Symbol::NT(var).to_str(self.get_symbol_table()),
579 prule_to_str(prule, self.get_symbol_table())));
580 prule.extend(alts);
581 prule.extend(indep);
582 continue;
583 }
584
585 let mut var_i = 0; let mut rule_var_i = 0; let mut var_alts: Vec<Vec<AltId>> = vec![vec![]]; let mut indep_alts = Vec::<AltId>::new();
592 let mut pr_info = Vec::<AltInfo>::new(); let mut has_ambig = false;
594
595 for (i, alt) in alts.iter().index::<AltId>() {
596 let ty = AltType::from(&symbol, alt);
597 has_ambig |= ty == AltType::LeftAssoc || ty == AltType::RightAssoc;
598 var_i = match ty {
599 AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
600 AltType::LeftAssoc => if prec_eq[i as usize] { var_i } else { var_i + 1 },
601 AltType::Prefix
602 | AltType::RightAssoc => if var_i > rule_var_i || prec_eq[i as usize] || var_alts[var_i].is_empty() { var_i } else { var_i + 1 },
603 AltType::Suffix => var_i,
604 };
605 let alt_info = AltInfo {
606 pred_priority: if ty == AltType::Prefix { None } else { Some(i) },
607 ivar: var_i,
608 ty
609 };
610 pr_info.push(alt_info);
611 let top_maybe = match ty {
612 AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
613 AltType::LeftAssoc => Some(var_i - 1), AltType::Prefix => None, AltType::RightAssoc
616 | AltType::Suffix => Some(var_i), };
618 if let Some(top) = top_maybe {
619 rule_var_i = top;
620 var_alts.resize(1 + top, vec![]);
621 (0..=top).for_each(|v| var_alts[v].push(i));
622 } else {
623 indep_alts.push(i);
624 }
625 let var_alt_str = format!(
626 "[{i:2}] {:15}: {:10} => var_i = {var_i:2}, rule_var_i = {rule_var_i:2}, {{{}}}",
627 alt.to_str(self.get_symbol_table()),
628 format!("{ty:?}"), var_alts.iter().enumerate().map(|(i, va)| format!("{i}: {}", va.iter().join(","))).join(" "));
630 self.log.add_note(format!(" - {var_alt_str}"));
631 if VERBOSE { println!("- {var_alt_str}"); }
632 };
633 assert!(var_i <= rule_var_i + 1, "var_i = {var_i}, rule_var_i = {rule_var_i}");
634
635 let need_indep = indep.len() + indep_alts.len() > 1
639 && (MAX_DISTRIB_LEN.map(|max| indep.len() + indep_alts.len() > max).unwrap_or(false) || has_ambig || rule_var_i < var_i)
640 || DONT_DISTRIB_IN_AMBIG && has_ambig;
641 let num_indep = if need_indep { 1 } else { 0 };
642 let mut var_i_nt = Vec::<(VarId, VarId)>::with_capacity(var_i + 1);
643 var_i_nt.push((var, var_new as VarId));
644 var_i_nt.extend((0..var_i).map(|i| ((var_new + i*2 + 1) as VarId, (var_new + i*2 + 2) as VarId)));
645 var_new += rule_var_i * 2 + 1 + num_indep;
646 if VERBOSE { println!("adding {} variables (w/o independent alternatives), need_indep = {need_indep}, var_new: {} -> {var_new}",
647 rule_var_i * 2 + 1 + num_indep, var_new - (rule_var_i * 2 + 1 + num_indep)); }
648 let nt_indep_maybe = if need_indep { Some(var_new as VarId - 1) } else { None };
649 if var_new > VarId::MAX as usize {
650 self.log.add_error(format!("too many nonterminals when expanding {var_name}: {var_new} > {}", VarId::MAX));
651 return;
652 }
653
654 let new_alts = alts.iter().zip(&pr_info).map(|(a, AltInfo { ivar, ty, .. })| {
657 let mut new_a: Alternative;
658 match ty {
659 AltType::LeftAssoc | AltType::RightAssoc => {
660 new_a = Alternative::new(a.v[1..a.len() - 1].to_owned());
661 if need_indep || *ivar <= rule_var_i {
662 new_a.v.push(Symbol::NT(var_i_nt[*ivar].0));
663 } else {
664 new_a.v.extend(&indep[0].v);
665 }
666 if ty == &AltType::RightAssoc {
667 new_a.flags = ruleflag::R_ASSOC;
668 }
669 }
670 AltType::Suffix => {
671 new_a = Alternative::new(a.v[1..a.len()].to_owned());
672 }
673 AltType::Prefix => {
674 new_a = Alternative::new(a.v[..a.len() - 1].to_owned());
675 new_a.v.push(Symbol::NT(var_i_nt[*ivar].0));
676 }
677 AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
678 }
679 new_a.flags |= a.flags & ruleflag::ALTERNATIVE_INFO;
680 new_a.origin = a.origin;
681 new_a.ambig_alt_id = Some(ambig_alt_id);
682 ambig_alt_id += 1;
683 new_a
684 }).to_vec();
685 let mut used_sym = HashSet::<Symbol>::new();
686 let mut prod_indep = new_alts.iter()
687 .zip(&pr_info)
688 .filter_map(|(nf, AltInfo { ty, .. })| if ty == &AltType::Prefix { Some(nf.clone()) } else { None })
689 .to_vec();
690 let greedy_prologue = pr_info[0].ty == AltType::Prefix && need_indep || pr_info[0].ty == AltType::RightAssoc;
693 for (i, va) in var_alts.into_iter().enumerate() {
694 let (nt, nt_loop) = var_i_nt[i];
695 let prod_nt = if let Some(nt_indep) = nt_indep_maybe {
696 prule!(nt nt_indep, nt nt_loop)
697 } else {
698 prod_indep.iter().cloned()
700 .chain(indep.iter().map(|a| {
701 let mut new_a = a.clone();
702 new_a.push(sym!(nt nt_loop));
703 new_a
704 })).collect()
705 };
706 let mut new_used_sym = Vec::<Symbol>::new();
707 let mut prod_nt_loop = va.iter().enumerate().rev().map(|(_, &a_id)| {
708 let mut f = new_alts[a_id as usize].clone();
709 f.v.push(Symbol::NT(nt_loop));
710 let sym = f.first().unwrap();
711 let is_used_sym = used_sym.contains(sym);
712 if !is_used_sym {
713 new_used_sym.push(*sym);
714 }
715 if is_used_sym || (i == 0 && greedy_prologue) {
716 f.flags |= ruleflag::GREEDY;
717 }
718 if !has_ambig && pr_info[a_id as usize].ty == AltType::Suffix {
719 self.set_flags(nt, ruleflag::PARENT_L_RECURSION);
720 self.set_flags(nt_loop, ruleflag::CHILD_L_RECURSION);
721 }
722 f
723 }).to_vec();
724 used_sym.extend(new_used_sym);
725 prod_nt_loop.push(alt!(e));
726 if i == 0 {
727 *prule = prod_nt;
728 extra_prods.push(prod_nt_loop);
729 self.symbol_table.as_mut().map(|t| {
730 assert_eq!(t.add_child_nonterminal(var), var_i_nt[0].1);
731 });
732 } else {
733 extra_prods.extend([prod_nt, prod_nt_loop]);
734 self.symbol_table.as_mut().map(|t| {
735 assert_eq!(t.add_child_nonterminal(var), var_i_nt[i].0);
736 assert_eq!(t.add_child_nonterminal(var), var_i_nt[i].1);
737 });
738 }
739 if has_ambig {
740 self.set_flags(nt, ruleflag::PARENT_L_RECURSION);
741 self.set_flags(nt_loop, ruleflag::CHILD_L_RECURSION);
742 }
743 }
744 if need_indep {
745 self.symbol_table.as_mut().map(|t| {
746 assert_eq!(t.add_child_nonterminal(var), nt_indep_maybe.unwrap());
747 });
748 }
749 if VERBOSE {
750 println!("new alternatives: {}", new_alts.iter().enumerate()
751 .filter_map(|(i, na)| if na.is_empty() { None } else { Some(format!("[{i}] {}", na.to_str(self.get_symbol_table()))) }).join(", "));
752 println!("vars: {}{}", var_i_nt.iter().take(rule_var_i + 1).enumerate().map(|(i, (v1, v2))|
753 format!("[{i}]:{},{}", Symbol::NT(*v1).to_str(self.get_symbol_table()),
754 Symbol::NT(*v2).to_str(self.get_symbol_table()))).join(" "),
755 if need_indep { format!(" [{}]:{}", var_i, Symbol::NT(var_i_nt[var_i].0).to_str(self.get_symbol_table() )) } else { String::new() });
756 }
757 if has_ambig {
758 self.set_flags(var, ruleflag::PARENT_AMBIGUITY);
759 } else if !prod_indep.is_empty() && !need_indep {
760 self.set_flags(var, ruleflag::R_RECURSION);
761 }
762 for (nt, nt_prime) in var_i_nt.into_iter().take(rule_var_i + 1) {
763 if nt != var {
764 self.set_parent(nt, var);
765 }
766 self.set_parent(nt_prime, nt);
767 }
768 if let Some(nt_indep) = nt_indep_maybe {
769 if !has_ambig && prod_indep.iter().any(|p| p.last() == Some(&Symbol::NT(nt_indep)))
770 || has_ambig && !prod_indep.is_empty()
771 {
772 self.set_flags(nt_indep, ruleflag::R_RECURSION);
773 }
774 prod_indep.extend(indep);
775 self.set_parent(nt_indep, var);
776 extra_prods.push(prod_indep);
777 }
778 self.parent.resize(var_new, None);
779 self.flags.resize(var_new, 0);
780 self.log.add_note(format!(" => {var_name} -> {}", prule_to_str(prule, self.get_symbol_table())));
781 for (v, p) in extra_prods.iter().index_start(prules.len() as VarId) {
782 self.log.add_note(
783 format!(" {} -> {}", Symbol::NT(v).to_str(self.get_symbol_table()), prule_to_str(p, self.get_symbol_table())));
784 }
785 } else if prule.iter().any(|p| !p.is_empty() && p.last().unwrap() == &symbol) {
786 if self.get_flags(var) & ruleflag::CHILD_REPEAT == 0 {
787 self.set_flags(var, ruleflag::R_RECURSION);
788 }
789 }
790 prules.extend(extra_prods);
791 let incompatibility_flags = ruleflag::R_RECURSION | ruleflag::L_FORM | ruleflag::PARENT_L_RECURSION;
792 let incompatibility_str = "<L> right recursivity + left recursivity";
793 let flags = self.get_flags(var);
794 if flags & incompatibility_flags == incompatibility_flags {
795 let tree = &self.origin.trees[var as usize];
796 self.log.add_error(format!(
797 "`{var_name} -> {}` has incompatible rule alternatives: {incompatibility_str}",
798 grtree_to_str(tree, None, None, None, self.get_symbol_table(), false)));
799 }
800 }
801 if VERBOSE {
802 println!("#prules: {}, #flags: {}, #parents:{}", prules.len(), self.flags.len(), self.parent.len());
803 if let Some(ref mut table) = self.symbol_table {
804 println!("table: {} ({})", table.get_nonterminals().join(", "), table.get_num_nt());
805 }
806 }
807 self.prules = prules;
808 self.num_nt = self.prules.len();
809 if VERBOSE {
810 println!("FINAL:");
811 self.print_rules(false, false);
812 println!("FLAGS:\n{}", self.flags.iter().index::<VarId>()
813 .map(|(v, f)| format!("- ({v:2}) {}: {}", Symbol::NT(v).to_str(self.get_symbol_table()), ruleflag::to_string(*f).join(", "))).join("\n"));
814 if let Some(table) = &self.symbol_table {
815 println!("Symbol table:\n-NT: {}\n-T: {}",
816 table.get_nonterminals().enumerate().map(|(i, nt)| format!("{i}:{nt}")).join(", "),
817 table.get_terminals().enumerate()
818 .map(|(i, (t, txt))| format!("{i}:{t}{}", if let Some(st) = txt { format!(" ({st})") } else { String::new() })).join(", "))
819 }
820 }
821 }
822
823 pub fn left_factorize(&mut self) {
837 fn similarity(a: &Alternative, b: &Alternative) -> usize {
838 a.iter().zip(b.iter()).take_while(|(a, b)| a == b).count()
839 }
840
841 const VERBOSE: bool = false;
842 self.log.add_note("removing left factorization...");
843 let mut new_var = self.get_next_available_var();
844 let mut prules = take(&mut self.prules);
846 let mut i = 0;
847 while i < prules.len() {
848 let prule = &mut prules[i];
849 let var = i as VarId;
850 i += 1;
851 if prule.len() < 2 {
852 continue
853 }
854 let mut alts = prule.clone();
855 let mut extra = Vec::<ProdRule>::new();
856 let mut changed = false;
857 alts.sort();
858 if VERBOSE { println!("{var}: {} -> {}", Symbol::NT(var).to_str(self.get_symbol_table()), alts.iter().map(|f| f.to_str(self.get_symbol_table())).join(" | ")); }
859 while alts.len() > 1 {
860 let simi = alts.windows(2).enumerate()
861 .map(|(j, x)| (j, similarity(&x[0], &x[1])))
862 .skip_while(|(_, s)| *s == 0)
863 .take_while(|(_, s)| *s != 0)
864 .to_vec();
865 if simi.is_empty() {
866 if VERBOSE { println!(" nothing to factorize"); }
867 break;
868 }
869 changed = true;
870 let min = simi.iter().map(|(_, s)| *s).min().unwrap();
871 let start = simi[0].0;
872 let stop = start + simi.len();
873 let mut factorized = Alternative::new(alts[start].v.iter().take(min).cloned().to_vec());
874 let mut child = alts.drain(start..=stop).to_vec();
875 if VERBOSE { println!("child:\n{}", child.iter().enumerate().map(|(i, c)| format!("- [{i}] = {} org = {:?}", c.to_str(self.get_symbol_table()), c.origin)).join("\n")); }
876 if child.iter().all(|f| f.is_greedy()) {
877 factorized.flags |= ruleflag::GREEDY;
878 }
879 for f in &mut child {
880 f.v.drain(0..min);
881 }
882 if child[0].v.is_empty() {
883 if self.flags[var as usize] & ruleflag::CHILD_REPEAT != 0 {
884 factorized.origin = child[0].origin;
885 }
886 child[0].v.push(Symbol::Empty);
887 let empty = child.remove(0);
888 child.push(empty);
889 }
890 let var_prime = new_var;
891 new_var += 1;
892 self.set_flags(var, ruleflag::PARENT_L_FACTOR);
893 self.set_flags(var_prime, ruleflag::CHILD_L_FACT);
894 let top = self.get_top_parent(var);
895 self.symbol_table.as_mut().map(|table| assert_eq!(table.add_child_nonterminal(top), var_prime));
896 self.set_parent(var_prime, var);
897 let symbol_prime = Symbol::NT(var_prime);
898 factorized.v.push(symbol_prime);
899 alts.insert(start, factorized);
900 if VERBOSE {
901 println!(" - similarity: {} => {}", simi.iter().map(|(j, s)| format!("{j}:{s}")).join(", "), min);
902 println!(" factorize: {}", child.iter().map(|a| a.to_str(self.get_symbol_table())).join(" | "));
903 println!(" left: {}", alts.iter().map(|a| a.to_str(self.get_symbol_table())).join(" | "));
904 }
905 extra.push(child);
906 }
907 if changed {
908 self.log.add_note(format!(
909 "- modifying: {} -> {}",
910 Symbol::NT(var).to_str(self.get_symbol_table()), prule_to_str(prule, self.get_symbol_table())));
911 self.log.add_note(format!(
912 " => {} -> {}",
913 Symbol::NT(var).to_str(self.get_symbol_table()), prule_to_str(&alts, self.get_symbol_table())));
914 *prule = alts;
915 let offset = prules.len() as VarId;
916 for (v, p) in extra.iter().index_start(offset) {
917 self.log.add_note(format!(
918 " {} -> {}",
919 Symbol::NT(v).to_str(self.get_symbol_table()), prule_to_str(p, self.get_symbol_table())));
920 }
921 prules.extend(extra);
922 }
923 }
924 self.prules = prules;
925 self.num_nt = self.prules.len();
926 }
927
928 pub(crate) fn remove_ambiguity(&self) {
929 todo!()
930 }
931
932 fn transfer_alt_flags(&mut self) {
934 const FLAG_MASK: u32 = ruleflag::L_FORM;
936
937 for (v, prule) in self.prules.iter_mut().enumerate() {
938 let flags = prule.iter().fold(0, |acc, alt| acc | alt.flags) & FLAG_MASK;
939 self.flags[v] |= flags;
940 for alt in prule.iter_mut() {
941 alt.flags &= !FLAG_MASK;
942 }
943 }
944 }
945
946 fn check_flags(&mut self) {
947 const FLAG_CHECK_MASK: u32 = ruleflag::L_FORM | ruleflag::CHILD_REPEAT | ruleflag::R_RECURSION;
948 for v in 0..self.num_nt {
949 if self.flags[v] & FLAG_CHECK_MASK == ruleflag::L_FORM {
950 if self.flags[v] & ruleflag::CHILD_L_FACT == 0 || self.flags[self.parent[v].unwrap() as usize] & ruleflag::R_RECURSION == 0 {
952 self.log.add_error(format!("{} has an illegal flag L-Form (only used with +, *, or right recursion): {}",
953 Symbol::NT(v as VarId).to_str(self.get_symbol_table()),
954 ruleflag::to_string(self.flags[v]).join(" ")
955 ));
956 }
957 }
958 }
959 }
960
961 pub fn prs_alt_origins_str(&self, ansi: bool) -> Vec<String> {
966 let mut cols = vec![vec!["ProdRuleSet".to_string(), "|".to_string(), "Original rules".to_string()]];
967 cols.push(vec!["-----------".to_string(), "|".to_string(), "--------------".to_string()]);
968 cols.extend(self.get_prules_iter()
970 .flat_map(|(v, prule)| prule.iter()
971 .map(move |alt| vec![
972 alt_to_rule_str(v, &alt.v, self.get_symbol_table()),
973 "|".to_string(),
974 if let Some((vo, ido)) = alt.origin {
975 let tree = &self.origin.trees[vo as usize];
976 let emphasis = if tree.get_root() == Some(ido) { None } else { Some(ido) };
977 let orig_rule = grtree_to_str_custom(tree, None, emphasis, Some(vo), self.get_symbol_table(), false, ansi);
978 format!("{} -> {orig_rule}", Symbol::NT(vo).to_str(self.get_symbol_table()))
979 } else {
980 String::new()
981 }
982 ])
983 ));
984 cols.extend((0..self.num_nt)
986 .filter_map(|var|
987 self.parent[var]
988 .and_then(|_| self.origin.map.get(&(var as VarId))
989 .and_then(|&(v, index)| Some((var as VarId, v, index)))))
990 .map(|(var, v, index)| vec![
991 Symbol::NT(var).to_str(self.get_symbol_table()),
992 "|".to_string(),
993 format!("{} -> {}",
994 Symbol::NT(v).to_str(self.get_symbol_table()),
995 grtree_to_str_custom(&self.origin.trees[v as usize], None, Some(index), None, self.get_symbol_table(), false, ansi))
996
997 ]));
998 columns_to_str(cols, None)
999 }
1000}
1001
1002impl<T> LogReader for ProdRuleSet<T> {
1003 type Item = BufLog;
1004
1005 fn get_log(&self) -> &Self::Item {
1006 &self.log
1007 }
1008
1009 fn give_log(self) -> Self::Item {
1010 self.log
1011 }
1012}
1013
1014impl<T> HasBuildErrorSource for ProdRuleSet<T> {
1015 const SOURCE: BuildErrorSource = BuildErrorSource::ProdRuleSet;
1016}
1017
1018impl ProdRuleSet<General> {
1019 fn with_capacity(capacity: usize) -> Self {
1020 Self {
1021 prules: Vec::with_capacity(capacity),
1022 origin: Origin::new(),
1023 num_nt: 0,
1024 num_t: 0,
1025 symbol_table: None,
1026 flags: Vec::with_capacity(capacity),
1027 parent: Vec::with_capacity(capacity),
1028 start: None,
1029 name: None,
1030 nt_conversion: HashMap::new(),
1031 log: BufLog::new(),
1032 _phantom: PhantomData
1033 }
1034 }
1035}
1036
1037impl ProdRuleSet<LL1> {
1038 pub(super) fn calc_table(&mut self, first: &HashMap<Symbol, HashSet<Symbol>>, follow: &HashMap<Symbol, HashSet<Symbol>>, error_recovery: bool) -> LLParsingTable {
1047 fn add_table(table: &mut Vec<Vec<AltId>>, num_t: usize, nt_id: VarId, t_id: VarId, a_id: AltId) {
1048 let pos = nt_id as usize * num_t + t_id as usize;
1049 table[pos].push(a_id);
1050 }
1051 const VERBOSE: bool = false;
1052 const DISABLE_FILTER: bool = false;
1053 if !self.log.has_no_errors() {
1054 return LLParsingTable::new();
1055 }
1056 let mut alts = self.prules.iter().index().filter(|(v, _)| DISABLE_FILTER || first.contains_key(&Symbol::NT(*v)))
1057 .flat_map(|(v, x)| x.iter().map(move |a| (v, a.clone()))).to_vec();
1058 let error_skip = alts.len() as AltId; let error_pop = error_skip + 1; let num_nt = self.num_nt;
1061 let num_t = self.num_t + 1;
1062 let end = (num_t - 1) as VarId; let mut used_t = HashSet::<Symbol>::new();
1064 let mut table: Vec<Vec<AltId>> = vec![vec![]; num_nt * num_t];
1065 for (a_id, (nt_id, alt)) in alts.iter().index() {
1066 used_t.extend(alt.iter().filter(|s| s.is_t()));
1067 if VERBOSE { println!("- {a_id}: {} -> {} => {}", Symbol::NT(*nt_id).to_str(self.get_symbol_table()),
1068 alt.to_str(self.get_symbol_table()),
1069 alt.calc_alt_first(first).iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
1070 let mut has_end = false;
1071 let mut has_empty = false;
1072 for s in alt.calc_alt_first(first) {
1073 match s {
1074 Symbol::Empty => {
1075 has_empty = true;
1076 for s in &follow[&Symbol::NT(*nt_id)] {
1077 match s {
1078 Symbol::T(t_id) => add_table(&mut table, num_t, *nt_id, *t_id, a_id),
1079 Symbol::End => add_table(&mut table, num_t, *nt_id, end, a_id),
1080 _ => {}
1081 }
1082 }
1083 }
1084 Symbol::T(t_id) => {
1085 add_table(&mut table, num_t, *nt_id, t_id, a_id);
1086 }
1087 Symbol::NT(_) => {}
1088 Symbol::End => {
1089 has_end = true;
1090 }
1091 }
1092 }
1093 if has_empty && has_end {
1094 add_table(&mut table, num_t, *nt_id, end, end);
1095 }
1096 }
1097 let mut final_table = Vec::<AltId>::new();
1099 for nt_id in 0..num_nt {
1100 for t_id in 0..num_t {
1101 let pos = nt_id * num_t + t_id;
1102 final_table.push(match table[pos].len() {
1103 0 => {
1104 if error_recovery {
1105 let sym_t = if t_id < num_t - 1 { Symbol::T(t_id as TokenId) } else { Symbol::End };
1106 let sym_nt = Symbol::NT(nt_id as VarId);
1107 if follow[&sym_nt].contains(&sym_t) || first[&sym_nt].contains(&sym_t) {
1108 error_pop
1109 } else {
1110 error_skip
1111 }
1112 } else {
1113 error_skip
1114 }
1115 },
1116 1 => *table[pos].first().unwrap(),
1117 _ => {
1118 let greedies = table[pos].iter().filter(|&a_id| alts[*a_id as usize].1.is_greedy()).cloned().to_vec();
1120 if greedies.len() == 1 {
1121 let chosen = greedies[0];
1122 self.log.add_note(
1123 format!("- calc_table: expected ambiguity for NT '{}', T '{}': {} => <{}> is specified as greedy and has been chosen",
1124 Symbol::NT(nt_id as VarId).to_str(self.get_symbol_table()),
1125 if t_id < self.num_t { Symbol::T(t_id as VarId).to_str(self.get_symbol_table()) } else { "<EOF>".to_string() },
1126 table[pos].iter().map(|a_id|
1127 format!("<{}>", alts[*a_id as usize].1.to_str(self.get_symbol_table()))).join(" or "),
1128 alts[chosen as usize].1.to_str(self.get_symbol_table())
1129 ));
1130 table[pos] = greedies;
1131 chosen
1132 } else {
1133 let row = (0..num_t).filter(|j| *j != t_id).flat_map(|j| &table[nt_id * num_t + j]).collect::<HashSet<_>>();
1134 let chosen = *table[pos].iter().find(|a| !row.contains(a)).unwrap_or(&table[pos][0]);
1135 self.log.add_warning(
1136 format!("- calc_table: ambiguity for NT '{}', T '{}': {} => <{}> has been chosen",
1137 Symbol::NT(nt_id as VarId).to_str(self.get_symbol_table()),
1138 if t_id < self.num_t { Symbol::T(t_id as VarId).to_str(self.get_symbol_table()) } else { "<EOF>".to_string() },
1139 table[pos].iter().map(|a_id|
1140 format!("<{}>", alts[*a_id as usize].1.to_str(self.get_symbol_table()))).join(" or "),
1141 alts[chosen as usize].1.to_str(self.get_symbol_table())
1142 ));
1143 table[pos] = vec![chosen];
1144 chosen
1145 }
1146 }
1147 });
1148 }
1149 }
1150 if !(0..num_t - 1).any(|t_id| (0..num_nt).any(|nt_id| final_table[nt_id * num_t + t_id] < error_skip)) {
1151 self.log.add_error("- calc_table: no terminal used in the table".to_string());
1152 }
1153 for (_, a) in &mut alts {
1154 a.flags &= !ruleflag::GREEDY;
1155 }
1156 LLParsingTable { num_nt, num_t, alts, table: final_table, flags: self.flags.clone(), parent: self.parent.clone() }
1157 }
1158
1159 pub fn make_parsing_table(&mut self, error_recovery: bool) -> LLParsingTable {
1160 self.log.add_note("calculating parsing table...");
1161 let first = self.calc_first();
1162 let follow = self.calc_follow(&first);
1163 self.calc_table(&first, &follow, error_recovery)
1164 }
1165
1166 pub fn gen_tables_source_code(&self, indent: usize) -> String {
1167 let st = self.symbol_table.as_ref().unwrap();
1168 let mut source = Vec::<String>::new();
1169 source.push(format!("static ORIGIN: [(Option<usize>, &[(GrNode, &[usize])]); {}] = [", self.origin.trees.len()));
1171 for t in &self.origin.trees {
1172 let tree_str = (0..t.len()).into_iter()
1173 .map(|i| format!("({}, &[{}])", t.get(i).gen_source_code(), t.children(i).into_iter().join(",")))
1174 .join(", ");
1175 source.push(format!(" ({:?}, &[{}]),", t.get_root(), tree_str));
1176 }
1177 source.push("];".to_string());
1178 source.push(format!("static MAP: [(VarId, (VarId, usize)); {}] = [", self.origin.map.len()));
1179 let mut sorted_map = self.origin.map.iter().to_vec();
1180 sorted_map.sort(); source.extend(sorted_map.chunks(5)
1182 .map(|chk| format!(" {},", chk.into_iter().map(|(a, (c, d))| format!("({a}, ({c}, {d}))")).join(", "))));
1183 source.push("];".to_string());
1184 source.push("let origin = Origin::from_data(".to_string());
1185 source.push(" ORIGIN.into_iter().map(|(root, nodes)| GrTree::from((root, nodes.to_vec()))).collect(),".to_string());
1186 source.push(" HashMap::from(MAP));".to_string());
1187 source.push(String::new());
1189 source.push("let ll1_tables = ProdRuleSetTables::new(".to_string());
1190 source.push(format!(" {:?},", self.name));
1191 source.push(" vec![".to_string());
1192 source.extend(self.prules.iter().map(|prule| format!(" {},", prule_to_macro(prule))));
1193 source.push(" ],".to_string());
1194 source.push(" origin,".to_string());
1195 source.push(format!(" vec![{}],", st.get_terminals().map(|x| format!("{x:?}")).join(", ")));
1196 source.push(format!(" vec![{}],", st.get_nonterminals().map(|x| format!("{x:?}")).join(", ")));
1197 source.push(format!(" vec![{}],", self.flags.iter().join(", ")));
1198 source.push(format!(" vec![{}],", self.parent.iter().map(|p_maybe| format!("{p_maybe:?}")).join(", ")));
1199 source.push(format!(" {:?},", self.start));
1200 source.push(format!(" hashmap![{}]", self.nt_conversion.iter().map(|(v, conv)| format!("{v} => {conv:?}")).join(", ")));
1201 source.push(");".to_string());
1202 indent_source(vec![source], indent)
1203 }
1204}
1205
1206pub struct ProdRuleSetTables {
1209 name: Option<String>,
1210 prules: Vec<ProdRule>,
1211 origin: Origin<VarId, FromPRS>,
1212 t: Vec<(String, Option<String>)>, nt: Vec<String>, flags: Vec<u32>,
1215 parent: Vec<Option<VarId>>,
1216 start: Option<VarId>,
1217 nt_conversion: HashMap<VarId, NTConversion>,
1218}
1219
1220impl ProdRuleSetTables {
1221 pub fn new<T: Into<String>>(
1222 name: Option<T>,
1223 prules: Vec<ProdRule>,
1224 origin: Origin<VarId, FromPRS>,
1225 t: Vec<(T, Option<T>)>,
1226 nt: Vec<T>,
1227 flags: Vec<u32>,
1228 parent: Vec<Option<VarId>>,
1229 start: Option<VarId>,
1230 nt_conversion: HashMap<VarId, NTConversion>,
1231 ) -> Self {
1232 let t = t.into_iter().map(|(t, t_maybe)| (t.into(), t_maybe.map(|t| t.into()))).collect();
1233 let nt = nt.into_iter().map(|nt| nt.into()).collect();
1234 ProdRuleSetTables {
1235 name: name.map(|s| s.into()),
1236 prules,
1237 origin, t, nt, flags, parent, start, nt_conversion,
1238 }
1239 }
1240
1241 pub fn get_name(&self) -> Option<&String> {
1242 self.name.as_ref()
1243 }
1244}
1245
1246impl BuildFrom<ProdRuleSetTables> for ProdRuleSet<LL1> {
1247 fn build_from(source: ProdRuleSetTables) -> Self {
1248 let mut symbol_table = SymbolTable::new();
1249 symbol_table.extend_terminals(source.t);
1250 symbol_table.extend_nonterminals(source.nt);
1251 ProdRuleSet {
1252 prules: source.prules,
1253 origin: source.origin,
1254 num_nt: symbol_table.get_num_nt(),
1255 num_t: symbol_table.get_num_t(),
1256 symbol_table: Some(symbol_table),
1257 flags: source.flags,
1258 parent: source.parent,
1259 start: source.start,
1260 name: source.name,
1261 nt_conversion: source.nt_conversion,
1262 log: BufLog::new(),
1263 _phantom: PhantomData,
1264 }
1265 }
1266}
1267
1268impl BuildFrom<RuleTreeSet<Normalized>> for ProdRuleSet<General> {
1271 fn build_from(mut rules: RuleTreeSet<Normalized>) -> Self {
1276 fn children_to_vec(tree: &GrTree, parent_id: usize) -> Alternative {
1277 let mut flags: u32 = 0;
1278 let alt = tree.children(parent_id).iter()
1279 .map(|id| tree.get(*id))
1280 .filter(|node| {
1281 match **node {
1282 GrNode::RAssoc => {
1283 flags |= ruleflag::R_ASSOC;
1284 false
1285 }
1286 GrNode::LForm(_) => {
1287 flags |= ruleflag::L_FORM;
1288 false
1289 }
1290 GrNode::PrecEq => {
1291 flags |= ruleflag::PREC_EQ;
1292 false
1293 }
1294 _ => true,
1295 }
1296 })
1297 .map(|node| {
1298 match node {
1299 GrNode::Symbol(s) => s.clone(),
1300 x => panic!("unexpected symbol {x} under &")
1301 }
1302 }).to_vec();
1303 Alternative::new(alt).with_flags(flags)
1304 }
1305 let mut prules = Self::with_capacity(rules.trees.len());
1306 prules.start = rules.start;
1307 prules.symbol_table = rules.symbol_table;
1308 prules.flags = rules.flags;
1309 prules.parent = rules.parent;
1310 prules.nt_conversion = rules.nt_conversion;
1311 prules.log = rules.log;
1312 if !prules.log.has_no_errors() {
1313 return prules;
1317 }
1318 prules.origin = Origin::<VarId, FromPRS>::from_trees_mut(&mut rules.origin.trees);
1319 for (var, tree) in rules.trees.iter().index() {
1320 if !tree.is_empty() {
1321 let root = tree.get_root().expect("tree {var} has no root");
1322 let root_sym = tree.get(root);
1323 let mut prule = match root_sym {
1324 GrNode::Symbol(s) => {
1325 let mut alt = Alternative::new(vec![s.clone()]);
1326 if let Some(&(v, ch)) = rules.origin.map.get(&(var, root)) {
1327 alt.origin = Some((v, ch));
1328 }
1329 vec![alt]
1330 },
1331 GrNode::Concat => {
1332 let mut alt = children_to_vec(tree, root);
1333 if let Some(&(v, ch)) = rules.origin.map.get(&(var, root)) {
1334 alt.origin = Some((v, ch));
1335 }
1336 vec![alt]
1337 },
1338 GrNode::Or => tree.children(root).iter()
1339 .map(|id| {
1340 let child = tree.get(*id);
1341 let mut alt = if let GrNode::Symbol(s) = child {
1342 Alternative::new(vec![s.clone()])
1343 } else {
1344 assert_eq!(*child, GrNode::Concat, "unexpected symbol {child} under |");
1345 children_to_vec(tree, *id)
1346 };
1347 if let Some(&(v, ch)) = rules.origin.map.get(&(var, *id)) {
1348 alt.origin = Some((v, ch));
1349 }
1350 alt
1351 }).to_vec(),
1352 s => panic!("unexpected symbol {s} as root of normalized GrTree for NT {}", Symbol::NT(var).to_str(prules.get_symbol_table()))
1353 };
1354 if prule.iter().any(|f| f.flags & ruleflag::L_FORM != 0) {
1355 prules.set_flags(var, ruleflag::L_FORM);
1368 for a in prule.iter_mut() {
1370 a.flags &= !ruleflag::L_FORM;
1371 }
1372 }
1373 if prules.parent[var as usize].is_some() {
1376 if let Some(&(v, index)) = rules.origin.map.get(&(var, root)) {
1377 prules.origin.add(var, (v, index));
1378 }
1379 }
1380 prules.prules.push(prule);
1381 } else {
1382 prules.prules.push(ProdRule::new()); }
1384 }
1385 prules.calc_num_symbols();
1386 prules
1387 }
1388}
1389
1390impl BuildFrom<RuleTreeSet<General>> for ProdRuleSet<General> {
1391 fn build_from(rules: RuleTreeSet<General>) -> Self {
1396 let mut prs = ProdRuleSet::build_from(RuleTreeSet::<Normalized>::build_from(rules));
1397 if prs.log.has_no_errors() {
1398 prs.simplify();
1399 }
1400 prs
1401 }
1402}
1403
1404impl BuildFrom<ProdRuleSet<General>> for ProdRuleSet<LL1> {
1405 fn build_from(mut rules: ProdRuleSet<General>) -> Self {
1406 if rules.log.has_no_errors() {
1407 rules.remove_recursion();
1408 rules.left_factorize();
1409 rules.transfer_alt_flags();
1410 rules.check_flags();
1411 }
1412 ProdRuleSet::<LL1> {
1413 prules: rules.prules,
1414 origin: rules.origin,
1415 num_nt: rules.num_nt,
1416 num_t: rules.num_t,
1417 symbol_table: rules.symbol_table,
1418 flags: rules.flags,
1419 parent: rules.parent,
1420 start: rules.start,
1421 name: rules.name,
1422 nt_conversion: rules.nt_conversion,
1423 log: rules.log,
1424 _phantom: PhantomData,
1425 }
1426 }
1427}
1428
1429impl BuildFrom<ProdRuleSet<General>> for ProdRuleSet<LR> {
1430 fn build_from(mut rules: ProdRuleSet<General>) -> Self {
1431 if rules.log.has_no_errors() {
1432 rules.remove_ambiguity();
1433 rules.transfer_alt_flags();
1434 rules.check_flags();
1435 }
1436 ProdRuleSet::<LR> {
1437 prules: rules.prules,
1438 origin: rules.origin,
1439 num_nt: rules.num_nt,
1440 num_t: rules.num_t,
1441 symbol_table: rules.symbol_table,
1442 flags: rules.flags,
1443 parent: rules.parent,
1444 start: rules.start,
1445 name: rules.name,
1446 nt_conversion: rules.nt_conversion,
1447 log: rules.log,
1448 _phantom: PhantomData,
1449 }
1450 }
1451}
1452
1453impl<T> ProdRuleSet<T> {
1457 pub fn print_rules(&self, as_comment: bool, filter_empty_nt: bool) {
1458 let prefix = if as_comment { " // " } else { " " };
1459 println!("{prefix}{}",
1460 self.get_prules_iter()
1461 .filter(|(_, rule)| !filter_empty_nt || **rule != prule!(e))
1462 .map(|(var, p)|
1463 format!("({var}) {} -> {}",
1464 Symbol::NT(var).to_str(self.get_symbol_table()),
1465 prule_to_str(p, self.get_symbol_table())))
1466 .join(&format!("\n{prefix}")));
1467 }
1468
1469 pub fn print_alts(&self) {
1470 println!("Alternatives:\n{}",
1471 self.get_alts().enumerate().map(|(id, (v, a))|
1472 format!(" // - {id}: {} -> {}{}",
1473 Symbol::NT(v).to_str(self.get_symbol_table()),
1474 a.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
1475 if a.flags != 0 { format!(" {} ({})", ruleflag::to_string(a.flags).join(" | "), a.flags) } else { "".to_string() }
1476 )
1477 ).join("\n"));
1478 }
1479}
1480
1481impl LLParsingTable {
1482 pub fn print(&self, symbol_table: Option<&SymbolTable>, indent: usize) {
1483 let LLParsingTable { num_nt, num_t, alts, table, .. } = self;
1484 let error_skip = alts.len() as AltId;
1485 let error_pop = error_skip + 1;
1486 let str_nt = (0..*num_nt).map(|i| Symbol::NT(i as VarId).to_str(symbol_table)).to_vec();
1487 let max_nt_len = str_nt.iter().map(|s| s.len()).max().unwrap();
1488 let str_t = (0..*num_t).map(|j| if j + 1 < *num_t { Symbol::T(j as VarId).to_str(symbol_table) } else { "$".to_string() }).to_vec();
1489 let t_len = str_t.iter().map(|s| s.len().max(3)).to_vec();
1490 println!("{:<i$}// {:<w$} | {}", "", "", (0..*num_t).map(|j| format!("{:^w$}", str_t[j], w = t_len[j])).join(" "), w = max_nt_len, i = indent);
1491 println!("{:<i$}// {:-<w$}-+-{:-<t$}", "", "", "", w = max_nt_len, t = *num_t + t_len.iter().sum::<usize>(), i = indent);
1492 for i in 0..*num_nt {
1493 print!("{:<i$}// {:<w$} |", "", str_nt[i], w = max_nt_len, i = indent);
1494 for j in 0..*num_t {
1495 let value = table[i * num_t + j];
1496 if value < error_skip {
1497 print!(" {:^w$}", value, w = t_len[j]);
1498 } else {
1499 print!(" {:^w$}", if value == error_pop { "p" } else { "." }, w = t_len[j]);
1500 }
1501 }
1502 println!();
1503 }
1504 }
1505}