1use super::*;
4use crate::{columns_to_str, AltId, CharLen, SymbolTable, VarId};
5use lexigram_core::alt::{alt_to_rule_str, Alternative};
6use lexigram_core::CollectJoin;
7use lexigram_core::log::LogMsg;
8
9pub type ProdRule = Vec<Alternative>;
21
22pub fn prule_to_str(prule: &ProdRule, symbol_table: Option<&SymbolTable>) -> String {
23 prule.iter().map(|alt| alt.to_str(symbol_table)).join(" | ")
24}
25
26pub fn prule_to_rule_str(nt: VarId, prule: &ProdRule, symbol_table: Option<&SymbolTable>) -> String {
27 format!("{} -> {}", Symbol::NT(nt).to_str(symbol_table), prule.iter().map(|alt| alt.to_str(symbol_table)).join(" | "))
28
29}
30
31pub fn prule_to_macro(prule: &ProdRule) -> String {
32 format!("prule!({})", prule.iter().map(|alt| alt.to_macro_item()).join("; "))
33}
34
35#[derive(Clone, Copy, PartialEq, Debug)]
36enum AltType { Independant, LeftAssoc, Prefix, RightAssoc, Suffix }
37
38impl AltType {
39 fn from(nt: &Symbol, alt: &Alternative) -> Self {
40 let left = matches!(alt.first(), Some(var) if var == nt);
41 let right = alt.len() > 1 && matches!(alt.last(), Some(var) if var == nt);
42 match (left, right) {
43 (false, false) => AltType::Independant,
44 (false, true) => AltType::Prefix,
45 (true, false) => AltType::Suffix,
46 (true, true) => if alt.flags & ruleflag::R_ASSOC != 0 { AltType::RightAssoc } else { AltType::LeftAssoc },
47 }
48 }
49}
50
51#[derive(Debug, Clone)]
52struct AltInfo {
53 #[allow(unused)]
54 pred_priority: Option<AltId>,
55 ivar: usize,
56 ty: AltType
57}
58
59#[derive(Debug)]
60pub struct LLParsingTable {
61 pub num_nt: usize,
62 pub num_t: usize, pub alts: Vec<(VarId, Alternative)>,
64 pub table: Vec<AltId>,
65 pub flags: Vec<u32>, pub parent: Vec<Option<VarId>>, }
68
69impl LLParsingTable {
70 pub fn new() -> Self {
71 LLParsingTable { num_nt: 0, num_t: 0, alts: vec![], table: vec![], flags: vec![], parent: vec![] }
72 }
73
74 pub fn get_top_parent(&self, nt: VarId) -> VarId {
75 let mut var = nt;
76 while let Some(parent) = self.parent[var as usize] {
77 var = parent;
78 }
79 var
80 }
81}
82
83impl Default for LLParsingTable {
84 fn default() -> Self {
85 Self::new()
86 }
87}
88
89#[derive(Clone, Debug)]
90pub struct ProdRuleSet<T> {
91 pub(super) prules: Option<Vec<ProdRule>>,
92 pub(crate) origin: Origin<VarId, FromPRS>,
93 pub(super) num_nt: usize,
94 pub(super) num_t: usize,
95 pub(crate) symbol_table: Option<SymbolTable>,
96 pub(super) flags: Vec<u32>,
97 pub(super) parent: Vec<Option<VarId>>,
98 pub(super) start: Option<VarId>,
99 pub(crate) name: Option<String>,
100 pub(crate) nt_conversion: HashMap<VarId, NTConversion>,
101 pub(crate) log: BufLog,
102 pub(crate) disable_warning_unused_nt_t: bool,
103 pub(super) _phantom: PhantomData<T>
104}
105
106impl<T> ProdRuleSet<T> {
107 pub fn get_start(&self) -> Option<VarId> {
109 self.start
110 }
111
112 pub fn set_start(&mut self, start: VarId) {
114 self.start = Some(start);
115 }
116
117 pub fn set_disable_warning_unused_nt_t(&mut self, flag: bool) {
120 self.disable_warning_unused_nt_t = flag;
121 }
122
123 pub fn get_name(&self) -> Option<&String> {
124 self.name.as_ref()
125 }
126
127 pub fn set_name(&mut self, name: Option<String>) {
128 self.name = name;
129 }
130
131 pub fn get_next_available_var(&self) -> VarId {
133 self.prules.as_ref().unwrap().len() as VarId }
135
136 pub fn get_prules_iter(&self) -> impl Iterator<Item=(VarId, &ProdRule)> {
138 self.prules.as_ref().unwrap().iter().index().filter_map(|(id, p)| if p.is_empty() { None } else { Some((id, p)) })
139 }
140
141 pub fn get_prules_iter_mut(&mut self) -> impl Iterator<Item=(VarId, &mut ProdRule)> {
142 self.prules.as_mut().unwrap().iter_mut().enumerate().filter_map(|(id, p)| if p.is_empty() { None } else { Some((id as VarId, p)) })
143 }
144
145 pub fn get_alts(&self) -> impl Iterator<Item=(VarId, &Alternative)> {
146 self.prules.as_ref().unwrap().iter().enumerate()
147 .flat_map(|(v, p)| p.iter().map(move |alt| (v as VarId, alt)))
148 }
149
150 pub fn set_symbol_table(&mut self, symbol_table: SymbolTable) {
151 self.symbol_table = Some(symbol_table);
152 }
153
154 pub fn get_symbol_table(&self) -> Option<&SymbolTable> {
155 self.symbol_table.as_ref()
156 }
157
158 pub fn get_num_nt(&self) -> usize {
159 self.num_nt
160 }
161
162 pub fn get_num_t(&self) -> usize {
163 self.num_t
164 }
165
166 pub fn set_flags(&mut self, nt: VarId, new_flags: u32) {
169 let nt = nt as usize;
170 if nt >= self.flags.len() {
171 self.flags.resize(nt + 1, 0);
172 }
173 self.flags[nt] |= new_flags;
174 }
175
176 pub fn get_flags(&self, nt: VarId) -> u32 {
177 let nt = nt as usize;
178 if nt < self.flags.len() {
179 self.flags[nt]
180 } else {
181 0
182 }
183 }
184
185 pub(super) fn set_parent(&mut self, child: VarId, parent: VarId) {
186 let child = child as usize;
187 if child >= self.parent.len() {
188 self.parent.resize(child + 1, None);
189 }
190 self.parent[child] = Some(parent);
191 }
192
193 #[allow(unused)]
194 #[cfg(test)] pub(super) fn get_parent(&self, child: VarId) -> Option<VarId> {
196 let child = child as usize;
197 if child >= self.parent.len() {
198 None
199 } else {
200 self.parent[child]
201 }
202 }
203
204 fn get_top_parent(&self, nt: VarId) -> VarId {
205 let mut var = nt;
206 while let Some(parent) = self.parent[var as usize] {
207 var = parent;
208 }
209 var
210 }
211
212 pub(super) fn calc_num_symbols(&mut self) {
219 self.num_nt = self.prules.as_ref().unwrap().len();
220 self.num_t = self.prules.as_ref().unwrap().iter().map(|p|
221 p.iter().map(|f|
222 f.iter().filter_map(|s|
223 if let Symbol::T(v) = s { Some(*v + 1) } else { None }
224 ).max().unwrap_or(0)
225 ).max().unwrap_or(0)
226 ).max().unwrap_or(0) as usize;
227 if let Some(st) = self.symbol_table.as_mut() {
228 st.downsize_num_t(self.num_t)
229 }
230 self.flags.resize(self.num_nt, 0);
231 self.parent.resize(self.num_nt, None);
232 }
233
234 pub(super) fn simplify(&mut self) {
236 for p in self.prules.as_mut().unwrap() {
237 let mut has_empty = false;
238 let mut i = 0;
239 while i < p.len() {
240 let alt = p.get_mut(i).unwrap();
241 let mut j = 0;
242 while j < alt.len() {
243 if alt[j].is_empty() && (j > 0 || j + 1 < alt.len()) {
244 alt.v.remove(j);
245 } else {
246 j += 1;
247 }
248 }
249 if alt.len() == 1 && alt[0].is_empty() {
250 if has_empty {
251 p.remove(i);
252 } else {
253 has_empty = true;
254 i += 1;
255 }
256 } else {
257 i += 1;
258 }
259 }
260 }
261 }
262
263 fn check_num_nt_coherency(&mut self) {
264 if let Some(n) = self.symbol_table.as_ref().map(|table| table.get_num_nt()) {
265 let num_nt = self.prules.as_ref().unwrap().len();
266 if n != num_nt {
267 self.log.add_error(format!("there are {num_nt} rules but the symbol table has {n} nonterminal symbols: dropping the table"));
268 self.symbol_table = None;
269 }
270 }
271 }
272
273 fn cleanup_symbols(&mut self, keep: &mut HashSet<Symbol>) {
278 const VERBOSE: bool = false;
279 let mut nt_content = (0..self.num_nt).map(|v| if self.parent[v].is_none() { Some(v as VarId) } else { None }).to_vec();
280 for (from, cnv) in self.nt_conversion.iter() {
281 let MovedTo(to) = cnv else { panic!("{cnv:?} unexpected") };
283 nt_content[*from as usize] = None;
284 nt_content[*to as usize] = Some(*from);
285 }
286 if VERBOSE {
287 println!("Removing unused nonterminals:");
288 let mut all_h = self.prules.as_ref().unwrap().iter().flat_map(|p| p.iter().flat_map(|x| &x.v)).cloned().collect::<HashSet<_>>();
289 all_h.extend((0..self.num_nt).map(|i| Symbol::NT(i as VarId)));
290 let mut all = all_h.into_iter().collect::<Vec<_>>();
291 all.sort();
292 println!(" current NT symbols: {}", all.iter().filter_map(|s|
293 if let Symbol::NT(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(", "));
294 println!(" current T symbols: {}", all.iter().filter_map(|s|
295 if let Symbol::T(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(" "));
296 let mut used = keep.iter().collect::<Vec<_>>();
297 used.sort();
298 println!(" used NT symbols: {}", used.iter().filter_map(|s| if let Symbol::NT(v) = s { Some((v, s)) } else { None })
299 .enumerate().map(|(new_id, (id, s))| format!("{}({id} -> {new_id})", s.to_str(self.get_symbol_table()))).join(", "));
300 println!(" used T symbols: {}", used.iter().filter_map(|s|
301 if let Symbol::T(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(" "));
302 println!(" nt_conversion: {:?}", self.nt_conversion);
303 println!(" nt_content: {}", nt_content.iter().enumerate()
304 .filter_map(|(v, f)| f.as_ref().map(|from| format!("{v}:{from}")))
305 .join(", ")
306 );
307 }
308 self.nt_conversion.clear();
309 let new_num_nt = keep.iter().filter(|s| matches!(s, Symbol::NT(_))).count() as VarId;
310 let mut new_v = new_num_nt;
311 let mut conv = HashMap::<VarId, VarId>::new();
312 for i in (0..self.num_nt).rev() {
313 let v = i as VarId;
314 let symbol = Symbol::NT(v);
315 if !keep.contains(&symbol) {
316 if VERBOSE { println!("- deleting {}", symbol.to_str(self.get_symbol_table())); }
317 if self.parent[i].is_none() {
318 self.nt_conversion.insert(v, Removed);
319 }
320 nt_content.remove(i);
321 self.prules.as_mut().unwrap().remove(i);
322 self.start = self.start.map(|s| if s >= v { s - 1 } else { s });
323 if let Some(t) = self.symbol_table.as_mut() { t.remove_nonterminal(v) };
324 self.flags.remove(i);
325 self.parent.remove(i);
326 if self.origin.trees.len() > i {
327 self.origin.trees.remove(i);
328 }
329
330 } else {
331 new_v -= 1;
332 conv.insert(v, new_v);
333 if VERBOSE { println!("- {symbol:?} -> {:?}", Symbol::NT(new_v)); }
334 }
335 }
336 for p in self.prules.as_mut().unwrap() {
337 for f in p {
338 for s in &mut f.v {
339 if let Symbol::NT(s_var) = s {
340 if let Some(new) = conv.get(s_var) {
341 *s = Symbol::NT(*new);
342 }
343 }
344 }
345 if let Some((ref mut var, _id)) = f.origin {
346 if let Some(new_var) = conv.get(var) {
347 *var = *new_var;
348 }
349 }
350 }
351 }
352 for p in &mut self.parent {
353 if let Some(parent) = p {
354 if let Some(new) = conv.get(parent) {
355 *p = Some(*new);
356 }
357 }
358 }
359 for t in &mut self.origin.trees {
360 for mut node in t.iter_post_depth_simple_mut() {
361 if let GrNode::Symbol(Symbol::NT(ref mut v)) = *node {
362 if let Some(new_v) = conv.get(v) {
363 *v = *new_v;
364 }
365 }
366 }
367 }
368 let new_map = self.origin.map.iter()
369 .map(|(v1, (v2, id))| (*conv.get(v1).unwrap_or(v1), (*conv.get(v2).unwrap_or(v2), *id)))
370 .collect::<HashMap<_, _>>();
371 self.origin.map = new_map;
375 keep.retain(|s| !matches!(s, Symbol::NT(_)));
376 keep.extend((0..new_num_nt as VarId).map(Symbol::NT));
377 self.num_nt = new_num_nt as usize;
378 if VERBOSE {
379 println!("-> nt_content: {}", nt_content.iter().enumerate()
380 .filter_map(|(v, f)| f.map(|from| format!("{v}:{from}")))
381 .join(", ")
382 );
383 }
384 for (to, f) in nt_content.into_iter().index() {
385 if let Some(from) = f {
386 if from != to {
387 self.nt_conversion.insert(from, MovedTo(to as VarId));
388 } else if self.nt_conversion.get(&from) == Some(&Removed) {
389 self.nt_conversion.remove(&from);
390 }
391 }
392 };
393 if VERBOSE { println!("-> nt_conversion: {:?}", self.nt_conversion); }
394 }
395
396 pub fn calc_first(&mut self) -> HashMap<Symbol, HashSet<Symbol>> {
397 const VERBOSE: bool = false;
398 if self.start.is_none() {
399 self.log.add_error("calc_first: start NT symbol not defined");
400 }
401 if self.num_nt == 0 {
402 self.log.add_error("calc_first: no nonterminal in grammar".to_string());
403 }
404 if !self.log.has_no_errors() {
405 return HashMap::new();
406 }
407 let mut symbols = HashSet::<Symbol>::new();
408 let mut stack = vec![Symbol::NT(self.start.unwrap())];
409 while let Some(sym) = stack.pop() {
410 if !symbols.contains(&sym) {
411 symbols.insert(sym);
412 if let Symbol::NT(v) = sym {
413 stack.extend(self.prules.as_ref().unwrap()[v as usize].iter().flat_map(|x| &x.v));
414 }
415 }
416 }
417 if symbols.iter().filter(|s| matches!(s, Symbol::NT(_))).count() != self.num_nt {
418 let nt_removed = (0..self.num_nt as VarId)
419 .filter(|v| !symbols.contains(&Symbol::NT(*v)) && !matches!(self.nt_conversion.get(v), Some(MovedTo(_))))
421 .map(Symbol::NT)
422 .to_vec();
423 if !nt_removed.is_empty() {
424 let msg = format!(
425 "calc_first: unused nonterminals: {}",
426 nt_removed.into_iter().map(|s| format!("{:?} = {}", s, s.to_str(self.get_symbol_table()))).join(", "));
427 if !self.disable_warning_unused_nt_t {
428 self.log.add_warning(msg);
429 } else {
430 self.log.add_note(msg);
431 }
432 }
433 self.cleanup_symbols(&mut symbols);
434 }
435
436 let unused_t = (0..self.num_t)
437 .filter_map(|t_id| {
438 let s = Symbol::T(t_id as VarId);
439 if !symbols.contains(&s) { Some(format!("T({t_id}) = {}", s.to_str(self.get_symbol_table()))) } else { None }
440 }).to_vec();
441 if !unused_t.is_empty() {
442 let msg = format!("calc_first: unused terminals: {}", unused_t.join(", "));
443 if !self.disable_warning_unused_nt_t {
444 self.log.add_warning(msg)
445 } else {
446 self.log.add_note(msg);
447 }
448 }
449
450 let mut first = symbols.into_iter().map(|sym| {
451 match &sym {
452 Symbol::T(_) | Symbol::Empty => (sym, hashset![sym]),
453 Symbol::NT(_) => (sym, HashSet::new()),
454 Symbol::End => panic!("found reserved symbol {sym:?} in production rules"),
455 }
456 }).collect::<HashMap<_, _>>();
457 let mut change = true;
458 let rules = (0..self.num_nt as VarId).filter(|var| first.contains_key(&Symbol::NT(*var))).to_vec();
459 if VERBOSE { println!("rules: {}", rules.iter().map(|v| Symbol::NT(*v).to_str(self.symbol_table.as_ref())).join(", ")); }
460 while change {
461 change = false;
462 for i in &rules {
463 let prule = &self.prules.as_ref().unwrap()[*i as usize];
464 let symbol = Symbol::NT(*i as VarId);
465 if VERBOSE { println!("- {} -> {}", symbol.to_str(self.symbol_table.as_ref()), prule_to_str(prule, self.symbol_table.as_ref())); }
466 let num_items = first[&symbol].len();
467 for alt in prule {
468 if VERBOSE { println!(" - {}", alt.to_str(self.symbol_table.as_ref())); }
469 assert!(!alt.is_empty(), "empty alternative for {}: {}",
470 symbol.to_str(self.symbol_table.as_ref()), alt.to_str(self.symbol_table.as_ref()));
471 if VERBOSE {
472 print!(" [0] {}", alt[0].to_str(self.symbol_table.as_ref()));
473 println!(", first = {}", first[&alt[0]].iter().map(|s| s.to_str(self.symbol_table.as_ref())).join(", "));
474 }
475 let new = alt.calc_alt_first(&first);
476 let _n = first.get(&symbol).unwrap().len();
477 first.get_mut(&symbol).unwrap().extend(new);
478 if VERBOSE && first.get(&symbol).unwrap().len() > _n {
479 println!(" first[{}] -> {}", symbol.to_str(self.get_symbol_table()),
480 first.get(&symbol).unwrap().iter().map(|s| s.to_str(self.get_symbol_table())).join(", "));
481 }
482 }
483 change |= first[&symbol].len() > num_items;
484 }
485 if VERBOSE && change { println!("---------------------------- again"); }
486 }
487 if self.num_t == 0 {
488 self.log.add_error("calc_first: no terminal in grammar".to_string());
489 }
490 first
491 }
492
493 pub fn calc_follow(&self, first: &HashMap<Symbol, HashSet<Symbol>>) -> HashMap<Symbol, HashSet<Symbol>> {
494 const VERBOSE: bool = false;
495 assert!(self.start.is_some(), "start NT symbol not defined");
496 if !self.log.has_no_errors() {
497 return HashMap::new();
498 }
499 let mut follow = first.iter()
500 .filter_map(|(s, _)| if matches!(s, Symbol::NT(_)) { Some((*s, HashSet::<Symbol>::new())) } else { None })
501 .collect::<HashMap<_, _>>();
502 follow.get_mut(&Symbol::NT(self.start.unwrap())).unwrap().insert(Symbol::End);
503 let rules = (0..self.num_nt as VarId).filter(|var| follow.contains_key(&Symbol::NT(*var))).to_vec();
504 let mut change = true;
505 while change {
506 change = false;
507 for i in &rules {
508 let prule = &self.prules.as_ref().unwrap()[*i as usize];
509 let symbol = Symbol::NT(*i as VarId);
510 if VERBOSE { println!("- {} -> {}", symbol.to_str(self.symbol_table.as_ref()), prule_to_str(prule, self.symbol_table.as_ref())); }
511 for alt in prule {
512 if VERBOSE { println!(" - {}", alt.to_str(self.symbol_table.as_ref())); }
513 let mut trail = follow.get(&symbol).unwrap().clone();
514 for sym_i in alt.iter().rev() {
515 if let Symbol::NT(_) = sym_i {
516 let num_items = follow.get(sym_i).unwrap().len();
517 follow.get_mut(sym_i).unwrap().extend(&trail);
518 if VERBOSE && follow.get(sym_i).unwrap().len() > num_items {
519 println!(" follow[{}] -> {}", sym_i.to_str(self.get_symbol_table()),
520 follow.get(sym_i).unwrap().iter().map(|s| s.to_str(self.get_symbol_table())).join(", "));
521 }
522 change |= follow.get(sym_i).unwrap().len() > num_items;
523 if first[sym_i].contains(&Symbol::Empty) {
524 trail.extend(first[sym_i].iter().filter(|s| *s != &Symbol::Empty));
525 } else {
526 trail.clear();
527 trail.extend(&first[sym_i]);
528 }
529 } else {
530 trail.clear();
531 trail.insert(*sym_i);
532 }
533 }
534 }
535 }
536 if VERBOSE && change { println!("---------------------------- again"); }
537 }
538 follow
539 }
540
541 pub(super) fn remove_recursion(&mut self) {
555 const MAX_DISTRIB_LEN: Option<usize> = None; const DONT_DISTRIB_IN_AMBIG: bool = true; const VERBOSE: bool = false;
561
562 self.log.add_note("removing left / binary recursion in grammar...");
563 self.check_num_nt_coherency();
564 if VERBOSE {
565 println!("ORIGINAL:");
566 self.print_rules(false, false);
567 }
568 let next_avail_var = self.get_next_available_var() as usize;
569 let mut var_new = next_avail_var;
570 let mut prules = self.prules.take().unwrap();
572 let mut ambig_alt_id = 0;
573 for var in 0..next_avail_var {
574 let prule = prules.get_mut(var).unwrap();
575 let var = var as VarId;
576 let symbol = Symbol::NT(var);
577 let var_name = symbol.to_str(self.get_symbol_table());
578 let mut extra_prods = Vec::<ProdRule>::new();
579 if prule.iter().any(|p| !p.is_empty() && (p.first().unwrap() == &symbol)) {
580 let orig_str = format!("{var_name} -> {}", prule_to_str(prule, self.get_symbol_table()));
581 self.log.add_note(format!("- modifying: {orig_str}"));
582 if VERBOSE { println!("processing: {orig_str}"); }
583
584 let (mut indep, mut alts) : (Vec<_>, Vec<_>) = take(prule).into_iter()
585 .partition(|alt| alt.first().unwrap() != &symbol && alt.last().unwrap() != &symbol);
586 for alt in indep.iter_mut() {
587 alt.retain(|s| !s.is_empty()); }
589 alts.reverse();
590 let mut prec_eq = alts.iter_mut()
591 .map(|f| {
592 let p = f.flags & ruleflag::PREC_EQ != 0;
593 f.flags &= !ruleflag::PREC_EQ;
594 p
595 }).to_vec();
596 prec_eq.pop();
597 prec_eq.insert(0, false);
598 if indep.is_empty() {
599 self.log.add_error(format!("recursive rules must have at least one independent alternative: {} -> {}",
600 Symbol::NT(var).to_str(self.get_symbol_table()),
601 prule_to_str(prule, self.get_symbol_table())));
602 prule.extend(alts);
603 continue;
605 }
606
607 let mut var_i = 0; let mut rule_var_i = 0; let mut var_alts: Vec<Vec<AltId>> = vec![vec![]]; let mut indep_alts = Vec::<AltId>::new();
614 let mut pr_info = Vec::<AltInfo>::new(); let mut has_ambig = false;
616
617 for (i, alt) in alts.iter().index::<AltId>() {
618 let ty = AltType::from(&symbol, alt);
619 has_ambig |= ty == AltType::LeftAssoc || ty == AltType::RightAssoc;
620 var_i = match ty {
621 AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
622 AltType::LeftAssoc => if prec_eq[i as usize] { var_i } else { var_i + 1 },
623 AltType::Prefix
624 | AltType::RightAssoc => if var_i > rule_var_i || prec_eq[i as usize] || var_alts[var_i].is_empty() { var_i } else { var_i + 1 },
625 AltType::Suffix => var_i,
626 };
627 let alt_info = AltInfo {
628 pred_priority: if ty == AltType::Prefix { None } else { Some(i) },
629 ivar: var_i,
630 ty
631 };
632 pr_info.push(alt_info);
633 let top_maybe = match ty {
634 AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
635 AltType::LeftAssoc => Some(var_i - 1), AltType::Prefix => None, AltType::RightAssoc
638 | AltType::Suffix => Some(var_i), };
640 if let Some(top) = top_maybe {
641 rule_var_i = top;
642 var_alts.resize(1 + top, vec![]);
643 (0..=top).for_each(|v| var_alts[v].push(i));
644 } else {
645 indep_alts.push(i);
646 }
647 let var_alt_str = format!(
648 "[{i:2}] {:15}: {:10} => var_i = {var_i:2}, rule_var_i = {rule_var_i:2}, {{{}}}",
649 alt.to_str(self.get_symbol_table()),
650 format!("{ty:?}"), var_alts.iter().enumerate().map(|(i, va)| format!("{i}: {}", va.iter().join(","))).join(" "));
652 self.log.add_note(format!(" - {var_alt_str}"));
653 if VERBOSE { println!("- {var_alt_str}"); }
654 };
655 assert!(var_i <= rule_var_i + 1, "var_i = {var_i}, rule_var_i = {rule_var_i}");
656
657 let need_indep = indep.len() + indep_alts.len() > 1
661 && (MAX_DISTRIB_LEN.map(|max| indep.len() + indep_alts.len() > max).unwrap_or(false) || has_ambig || rule_var_i < var_i)
662 || DONT_DISTRIB_IN_AMBIG && has_ambig;
663 let num_indep = if need_indep { 1 } else { 0 };
664 let mut var_i_nt = Vec::<(VarId, VarId)>::with_capacity(var_i + 1);
665 var_i_nt.push((var, var_new as VarId));
666 var_i_nt.extend((0..var_i).map(|i| ((var_new + i*2 + 1) as VarId, (var_new + i*2 + 2) as VarId)));
667 var_new += rule_var_i * 2 + 1 + num_indep;
668 if VERBOSE { println!("adding {} variables (w/o independent alternatives), need_indep = {need_indep}, var_new: {} -> {var_new}",
669 rule_var_i * 2 + 1 + num_indep, var_new - (rule_var_i * 2 + 1 + num_indep)); }
670 let nt_indep_maybe = if need_indep { Some(var_new as VarId - 1) } else { None };
671 if var_new > VarId::MAX as usize {
672 self.log.add_error(format!("too many nonterminals when expanding {var_name}: {var_new} > {}", VarId::MAX));
673 return;
674 }
675
676 let new_alts = alts.iter().zip(&pr_info).map(|(a, AltInfo { ivar, ty, .. })| {
679 let mut new_a: Alternative;
680 match ty {
681 AltType::LeftAssoc | AltType::RightAssoc => {
682 new_a = Alternative::new(a.v[1..a.len() - 1].to_owned());
683 if need_indep || *ivar <= rule_var_i {
684 new_a.v.push(Symbol::NT(var_i_nt[*ivar].0));
685 } else {
686 new_a.v.extend(&indep[0].v);
687 }
688 if ty == &AltType::RightAssoc {
689 new_a.flags = ruleflag::R_ASSOC;
690 }
691 }
692 AltType::Suffix => {
693 new_a = Alternative::new(a.v[1..a.len()].to_owned());
694 }
695 AltType::Prefix => {
696 new_a = Alternative::new(a.v[..a.len() - 1].to_owned());
697 new_a.v.push(Symbol::NT(var_i_nt[*ivar].0));
698 }
699 AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
700 }
701 new_a.flags |= a.flags & ruleflag::ALTERNATIVE_INFO;
702 new_a.origin = a.origin;
703 new_a.ambig_alt_id = Some(ambig_alt_id);
704 ambig_alt_id += 1;
705 new_a
706 }).to_vec();
707 let mut used_sym = HashSet::<Symbol>::new();
708 let mut prod_indep = new_alts.iter()
709 .zip(&pr_info)
710 .filter_map(|(nf, AltInfo { ty, .. })| if ty == &AltType::Prefix { Some(nf.clone()) } else { None })
711 .to_vec();
712 let greedy_prologue = pr_info[0].ty == AltType::Prefix && need_indep || pr_info[0].ty == AltType::RightAssoc;
715 for (i, va) in var_alts.into_iter().enumerate() {
716 let (nt, nt_loop) = var_i_nt[i];
717 let prod_nt = if let Some(nt_indep) = nt_indep_maybe {
718 prule!(nt nt_indep, nt nt_loop)
719 } else {
720 prod_indep.iter().cloned()
722 .chain(indep.iter().map(|a| {
723 let mut new_a = a.clone();
724 new_a.push(sym!(nt nt_loop));
725 new_a
726 })).collect()
727 };
728 let mut new_used_sym = Vec::<Symbol>::new();
729 let mut prod_nt_loop = va.iter().enumerate().rev().map(|(_, &a_id)| {
730 let mut f = new_alts[a_id as usize].clone();
731 f.v.push(Symbol::NT(nt_loop));
732 let sym = f.first().unwrap();
733 let is_used_sym = used_sym.contains(sym);
734 if !is_used_sym {
735 new_used_sym.push(*sym);
736 }
737 if is_used_sym || (i == 0 && greedy_prologue) {
738 f.flags |= ruleflag::GREEDY;
739 }
740 if !has_ambig && pr_info[a_id as usize].ty == AltType::Suffix {
741 self.set_flags(nt, ruleflag::PARENT_L_RECURSION);
742 self.set_flags(nt_loop, ruleflag::CHILD_L_RECURSION);
743 }
744 f
745 }).to_vec();
746 used_sym.extend(new_used_sym);
747 prod_nt_loop.push(alt!(e));
748 if i == 0 {
749 *prule = prod_nt;
750 extra_prods.push(prod_nt_loop);
751 if let Some(t) = self.symbol_table.as_mut() {
752 assert_eq!(t.add_child_nonterminal(var), var_i_nt[0].1);
753 };
754 } else {
755 extra_prods.extend([prod_nt, prod_nt_loop]);
756 if let Some(t) = self.symbol_table.as_mut() {
757 assert_eq!(t.add_child_nonterminal(var), var_i_nt[i].0);
758 assert_eq!(t.add_child_nonterminal(var), var_i_nt[i].1);
759 };
760 }
761 if has_ambig {
762 self.set_flags(nt, ruleflag::PARENT_L_RECURSION);
763 self.set_flags(nt_loop, ruleflag::CHILD_L_RECURSION);
764 }
765 }
766 if need_indep {
767 if let Some(t) = self.symbol_table.as_mut() {
768 assert_eq!(t.add_child_nonterminal(var), nt_indep_maybe.unwrap());
769 }
770 }
771 if VERBOSE {
772 println!("new alternatives: {}", new_alts.iter().enumerate()
773 .filter_map(|(i, na)| if na.is_empty() { None } else { Some(format!("[{i}] {}", na.to_str(self.get_symbol_table()))) }).join(", "));
774 println!("vars: {}{}", var_i_nt.iter().take(rule_var_i + 1).enumerate().map(|(i, (v1, v2))|
775 format!("[{i}]:{},{}", Symbol::NT(*v1).to_str(self.get_symbol_table()),
776 Symbol::NT(*v2).to_str(self.get_symbol_table()))).join(" "),
777 if need_indep { format!(" [{}]:{}", var_i, Symbol::NT(var_i_nt[var_i].0).to_str(self.get_symbol_table() )) } else { String::new() });
778 }
779 if has_ambig {
780 self.set_flags(var, ruleflag::PARENT_AMBIGUITY);
781 } else if !prod_indep.is_empty() && !need_indep {
782 self.set_flags(var, ruleflag::R_RECURSION);
783 }
784 for (nt, nt_prime) in var_i_nt.into_iter().take(rule_var_i + 1) {
785 if nt != var {
786 self.set_parent(nt, var);
787 }
788 self.set_parent(nt_prime, nt);
789 }
790 if let Some(nt_indep) = nt_indep_maybe {
791 if !has_ambig && prod_indep.iter().any(|p| p.last() == Some(&Symbol::NT(nt_indep)))
792 || has_ambig && !prod_indep.is_empty()
793 {
794 self.set_flags(nt_indep, ruleflag::R_RECURSION);
795 }
796 for alt in &mut indep {
797 if alt.v.is_empty() {
798 alt.v.push(Symbol::Empty);
799 }
800 }
801 prod_indep.extend(indep);
802 self.set_parent(nt_indep, var);
803 extra_prods.push(prod_indep);
804 }
805 self.parent.resize(var_new, None);
806 self.flags.resize(var_new, 0);
807 self.log.add_note(format!(" => {var_name} -> {}", prule_to_str(prule, self.get_symbol_table())));
808 for (v, p) in extra_prods.iter().index_start(prules.len() as VarId) {
809 self.log.add_note(
810 format!(" {} -> {}", Symbol::NT(v).to_str(self.get_symbol_table()), prule_to_str(p, self.get_symbol_table())));
811 }
812 } else if prule.iter().any(|p| !p.is_empty() && p.last().unwrap() == &symbol) && self.get_flags(var) & ruleflag::CHILD_REPEAT == 0 {
813 self.set_flags(var, ruleflag::R_RECURSION);
814 }
815 prules.extend(extra_prods);
816 let incompatibility_flags = ruleflag::R_RECURSION | ruleflag::L_FORM | ruleflag::PARENT_L_RECURSION;
817 let incompatibility_str = "<L> right recursivity + left recursivity";
818 let flags = self.get_flags(var);
819 if flags & incompatibility_flags == incompatibility_flags {
820 let tree = &self.origin.trees[var as usize];
821 self.log.add_error(format!(
822 "`{var_name} -> {}` has incompatible rule alternatives: {incompatibility_str}",
823 grtree_to_str(tree, None, None, None, self.get_symbol_table(), false)));
824 }
825 }
826 if VERBOSE {
827 println!("#prules: {}, #flags: {}, #parents:{}", prules.len(), self.flags.len(), self.parent.len());
828 if let Some(ref mut table) = self.symbol_table {
829 println!("table: {} ({})", table.get_nonterminals().join(", "), table.get_num_nt());
830 }
831 }
832 self.num_nt = prules.len();
833 self.prules = Some(prules);
834 if VERBOSE {
835 println!("FINAL:");
836 self.print_rules(false, false);
837 println!("FLAGS:\n{}", self.flags.iter().index::<VarId>()
838 .map(|(v, f)| format!("- ({v:2}) {}: {}", Symbol::NT(v).to_str(self.get_symbol_table()), ruleflag::to_string(*f).join(", "))).join("\n"));
839 if let Some(table) = &self.symbol_table {
840 println!("Symbol table:\n-NT: {}\n-T: {}",
841 table.get_nonterminals().enumerate().map(|(i, nt)| format!("{i}:{nt}")).join(", "),
842 table.get_terminals().enumerate()
843 .map(|(i, (t, txt))| format!("{i}:{t}{}", if let Some(st) = txt { format!(" ({st})") } else { String::new() })).join(", "))
844 }
845 }
846 }
847
848 pub fn left_factorize(&mut self) {
862 fn similarity(a: &Alternative, b: &Alternative) -> usize {
863 a.iter().zip(b.iter()).take_while(|(a, b)| a == b).count()
864 }
865
866 const VERBOSE: bool = false;
867 self.log.add_note("removing left factorization...");
868 let mut new_var = self.get_next_available_var();
869 let mut prules = self.prules.take().unwrap();
871 let mut i = 0;
872 while i < prules.len() {
873 let prule = &mut prules[i];
874 let var_flags = self.flags[i];
875 let var = i as VarId;
876 i += 1;
877 if prule.len() < 2 {
878 continue
879 }
880 let mut alts = prule.clone();
881 let mut extra = Vec::<ProdRule>::new();
882 let mut changed = false;
883 alts.sort();
884 if VERBOSE { println!("{var}: {} -> {}", Symbol::NT(var).to_str(self.get_symbol_table()), alts.iter().map(|f| f.to_str(self.get_symbol_table())).join(" | ")); }
885 while alts.len() > 1 {
886 let simi = alts.windows(2).enumerate()
887 .map(|(j, x)| (j, similarity(&x[0], &x[1])))
888 .skip_while(|(_, s)| *s == 0)
889 .take_while(|(_, s)| *s != 0)
890 .to_vec();
891 if simi.is_empty() {
892 if VERBOSE { println!(" nothing to factorize"); }
893 break;
894 }
895 changed = true;
896 let min = simi.iter().map(|(_, s)| *s).min().unwrap();
897 let start = simi[0].0;
898 let stop = start + simi.len();
899 let mut factorized = Alternative::new(alts[start].v.iter().take(min).cloned().to_vec());
900 let mut child = alts.drain(start..=stop).to_vec();
901 if VERBOSE { println!("child:\n{}", child.iter().enumerate().map(|(i, c)| format!("- [{i}] = {} org = {:?}", c.to_str(self.get_symbol_table()), c.origin)).join("\n")); }
902 if child.iter().all(|f| f.is_greedy()) {
903 factorized.flags |= ruleflag::GREEDY;
904 }
905 for f in &mut child {
906 f.v.drain(0..min);
907 }
908 if child[0].v.is_empty() {
909 if var_flags & ruleflag::CHILD_REPEAT != 0 {
910 factorized.origin = child[0].origin;
911 }
912 child[0].v.push(Symbol::Empty);
913 let empty = child.remove(0);
914 child.push(empty);
915 }
916 let var_prime = new_var;
917 new_var += 1;
918 self.set_flags(var, ruleflag::PARENT_L_FACTOR);
919 self.set_flags(var_prime, ruleflag::CHILD_L_FACT);
920 let rep_l_form = ruleflag::CHILD_REPEAT | ruleflag::L_FORM;
921 let top = if var_flags & rep_l_form == rep_l_form { var } else { self.get_top_parent(var) };
922 if let Some(table) = self.symbol_table.as_mut() {
923 assert_eq!(table.add_child_nonterminal(top), var_prime);
924 };
925 self.set_parent(var_prime, var);
926 let symbol_prime = Symbol::NT(var_prime);
927 factorized.v.push(symbol_prime);
928 alts.insert(start, factorized);
929 if VERBOSE {
930 println!(" - similarity: {} => {}", simi.iter().map(|(j, s)| format!("{j}:{s}")).join(", "), min);
931 println!(" factorize: {}", child.iter().map(|a| a.to_str(self.get_symbol_table())).join(" | "));
932 println!(" left: {}", alts.iter().map(|a| a.to_str(self.get_symbol_table())).join(" | "));
933 }
934 extra.push(child);
935 }
936 if changed {
937 self.log.add_note(format!(
938 "- modifying: {} -> {}",
939 Symbol::NT(var).to_str(self.get_symbol_table()), prule_to_str(prule, self.get_symbol_table())));
940 self.log.add_note(format!(
941 " => {} -> {}",
942 Symbol::NT(var).to_str(self.get_symbol_table()), prule_to_str(&alts, self.get_symbol_table())));
943 *prule = alts;
944 let offset = prules.len() as VarId;
945 for (v, p) in extra.iter().index_start(offset) {
946 self.log.add_note(format!(
947 " {} -> {}",
948 Symbol::NT(v).to_str(self.get_symbol_table()), prule_to_str(p, self.get_symbol_table())));
949 }
950 prules.extend(extra);
951 }
952 }
953 self.num_nt = prules.len();
954 self.prules = Some(prules);
955 }
956
957 pub(crate) fn remove_ambiguity(&self) {
958 todo!()
959 }
960
961 fn transfer_alt_flags(&mut self) {
963 const FLAG_MASK: u32 = ruleflag::L_FORM;
965
966 for (v, prule) in self.prules.as_mut().unwrap().iter_mut().enumerate() {
967 let flags = prule.iter().fold(0, |acc, alt| acc | alt.flags) & FLAG_MASK;
968 self.flags[v] |= flags;
969 for alt in prule.iter_mut() {
970 alt.flags &= !FLAG_MASK;
971 }
972 }
973 }
974
975 fn check_flags(&mut self) {
976 const FLAG_CHECK_MASK: u32 = ruleflag::L_FORM | ruleflag::CHILD_REPEAT | ruleflag::R_RECURSION;
977 for v in 0..self.num_nt {
978 if self.flags[v] & FLAG_CHECK_MASK == ruleflag::L_FORM {
979 if self.flags[v] & ruleflag::CHILD_L_FACT == 0 || self.flags[self.parent[v].unwrap() as usize] & ruleflag::R_RECURSION == 0 {
981 self.log.add_error(format!("{} has an illegal flag L-Form (only used with +, *, or right recursion): {}",
982 Symbol::NT(v as VarId).to_str(self.get_symbol_table()),
983 ruleflag::to_string(self.flags[v]).join(" ")
984 ));
985 }
986 }
987 }
988 }
989
990 pub fn prs_alt_origins_str(&self, ansi: bool) -> Vec<String> {
995 let mut cols = vec![vec!["| ProdRuleSet".to_string(), "|".to_string(), "Original rules".to_string()]];
996 cols.extend(self.get_prules_iter()
998 .flat_map(|(v, prule)| prule.iter()
999 .map(move |alt| vec![
1000 format!("| {}", alt_to_rule_str(v, &alt.v, self.get_symbol_table())),
1001 "|".to_string(),
1002 if let Some((vo, ido)) = alt.origin {
1003 let tree = &self.origin.trees[vo as usize];
1004 let emphasis = if tree.get_root() == Some(ido) { None } else { Some(ido) };
1005 let orig_rule = grtree_to_str_custom(tree, None, emphasis, Some(vo), self.get_symbol_table(), false, ansi);
1006 format!("{} -> {orig_rule}", Symbol::NT(vo).to_str(self.get_symbol_table()))
1007 } else {
1008 String::new()
1009 }
1010 ])
1011 ));
1012 let n1 = cols.len();
1013 cols.extend((0..self.num_nt)
1015 .filter_map(|var|
1016 self.parent[var]
1017 .and_then(|_| self.origin.map
1018 .get(&(var as VarId))
1019 .map(|&(v, index)| (var as VarId, v, index))))
1020 .map(|(var, v, index)| vec![
1021 format!("| {}", Symbol::NT(var).to_str(self.get_symbol_table())),
1022 "|".to_string(),
1023 format!("{} -> {}",
1024 Symbol::NT(v).to_str(self.get_symbol_table()),
1025 grtree_to_str_custom(&self.origin.trees[v as usize], None, Some(index), None, self.get_symbol_table(), false, ansi))
1026
1027 ]));
1028 let mut lines = columns_to_str(cols, None);
1029 let max = lines.iter().map(|s| s.replace(STR_BEFORE_ANSI, "").replace(STR_AFTER_ANSI, "").charlen()).max().unwrap();
1030 let sep = format!("{:-<1$}", "", max);
1031 lines.push(sep.clone());
1032 lines.insert(n1, sep.clone());
1033 lines.insert(1, sep);
1034 lines
1035 }
1036}
1037
1038impl<T> LogReader for ProdRuleSet<T> {
1039 type Item = BufLog;
1040
1041 fn get_log(&self) -> &Self::Item {
1042 &self.log
1043 }
1044
1045 fn give_log(self) -> Self::Item {
1046 self.log
1047 }
1048}
1049
1050impl<T> HasBuildErrorSource for ProdRuleSet<T> {
1051 const SOURCE: BuildErrorSource = BuildErrorSource::ProdRuleSet;
1052}
1053
1054impl ProdRuleSet<General> {
1055 fn with_capacity(capacity: usize) -> Self {
1056 Self {
1057 prules: Some(Vec::with_capacity(capacity)),
1058 origin: Origin::new(),
1059 num_nt: 0,
1060 num_t: 0,
1061 symbol_table: None,
1062 flags: Vec::with_capacity(capacity),
1063 parent: Vec::with_capacity(capacity),
1064 start: None,
1065 name: None,
1066 nt_conversion: HashMap::new(),
1067 log: BufLog::new(),
1068 disable_warning_unused_nt_t: false,
1069 _phantom: PhantomData
1070 }
1071 }
1072}
1073
1074impl ProdRuleSet<LL1> {
1075 pub(super) fn calc_table(&mut self, first: &HashMap<Symbol, HashSet<Symbol>>, follow: &HashMap<Symbol, HashSet<Symbol>>, error_recovery: bool)
1084 -> LLParsingTable
1085 {
1086 fn add_table(table: &mut [Vec<AltId>], num_t: usize, nt_id: VarId, t_id: VarId, a_id: AltId) {
1087 let pos = nt_id as usize * num_t + t_id as usize;
1088 table[pos].push(a_id);
1089 }
1090 const VERBOSE: bool = false;
1091 const DISABLE_FILTER: bool = false;
1092 if !self.log.has_no_errors() {
1093 return LLParsingTable::new();
1094 }
1095 if VERBOSE {
1096 fn p_hash_hash(title: &str, tbl: Option<&SymbolTable>, table: &HashMap<Symbol, HashSet<Symbol>>) {
1097 let mut syms = table.iter().map(|(s, hs)| {
1098 let mut f = hs.iter().cloned().to_vec();
1099 f.sort();
1100 (*s, f)
1101 }).to_vec();
1102 syms.sort();
1103 println!("{title}\n{}",
1104 syms.into_iter().map(|(s, f)| format!("- {} -> {}", s.to_str(tbl), f.into_iter().map(|s2| s2.to_str(tbl)).join(", "))).join("\n"));
1105 }
1106 p_hash_hash("first:", self.get_symbol_table(), first);
1107 p_hash_hash("follow:", self.get_symbol_table(), follow);
1108 }
1109 let mut alts = self.prules.as_ref().unwrap().iter().index().filter(|(v, _)| DISABLE_FILTER || first.contains_key(&Symbol::NT(*v)))
1110 .flat_map(|(v, x)| x.iter().map(move |a| (v, a.clone()))).to_vec();
1111 let error_skip = alts.len() as AltId; let error_pop = error_skip + 1; let num_nt = self.num_nt;
1114 let num_t = self.num_t + 1;
1115 let end = (num_t - 1) as VarId; let mut used_t = HashSet::<Symbol>::new();
1117 let mut table: Vec<Vec<AltId>> = vec![vec![]; num_nt * num_t];
1118 for (a_id, (nt_id, alt)) in alts.iter().index() {
1119 used_t.extend(alt.iter().filter(|s| s.is_t()));
1120 if VERBOSE { println!("- {a_id}: {} -> {} => {}", Symbol::NT(*nt_id).to_str(self.get_symbol_table()),
1121 alt.to_str(self.get_symbol_table()),
1122 alt.calc_alt_first(first).iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
1123 let mut has_end = false;
1124 let mut has_empty = false;
1125 for s in alt.calc_alt_first(first) {
1126 match s {
1127 Symbol::Empty => {
1128 has_empty = true;
1129 for s in &follow[&Symbol::NT(*nt_id)] {
1130 match s {
1131 Symbol::T(t_id) => add_table(&mut table, num_t, *nt_id, *t_id, a_id),
1132 Symbol::End => add_table(&mut table, num_t, *nt_id, end, a_id),
1133 _ => {}
1134 }
1135 }
1136 }
1137 Symbol::T(t_id) => {
1138 add_table(&mut table, num_t, *nt_id, t_id, a_id);
1139 }
1140 Symbol::NT(_) => {}
1141 Symbol::End => {
1142 has_end = true;
1143 }
1144 }
1145 }
1146 if has_empty && has_end {
1147 add_table(&mut table, num_t, *nt_id, end, end);
1148 }
1149 }
1150 let mut final_table = Vec::<AltId>::new();
1152 for nt_id in 0..num_nt {
1153 for t_id in 0..num_t {
1154 let pos = nt_id * num_t + t_id;
1155 final_table.push(match table[pos].len() {
1156 0 => {
1157 if error_recovery {
1158 let sym_t = if t_id < num_t - 1 { Symbol::T(t_id as TokenId) } else { Symbol::End };
1159 let sym_nt = Symbol::NT(nt_id as VarId);
1160 if follow[&sym_nt].contains(&sym_t) || first[&sym_nt].contains(&sym_t) {
1161 error_pop
1162 } else {
1163 error_skip
1164 }
1165 } else {
1166 error_skip
1167 }
1168 },
1169 1 => *table[pos].first().unwrap(),
1170 _ => {
1171 let greedies = table[pos].iter().filter(|&a_id| alts[*a_id as usize].1.is_greedy()).cloned().to_vec();
1173 if greedies.len() == 1 {
1174 let chosen = greedies[0];
1175 self.log.add_note(
1176 format!(" - calc_table: expected ambiguity for NT '{}', T '{}': {} => <{}> is specified as greedy and has been chosen",
1177 Symbol::NT(nt_id as VarId).to_str(self.get_symbol_table()),
1178 if t_id < self.num_t { Symbol::T(t_id as VarId).to_str(self.get_symbol_table()) } else { "<EOF>".to_string() },
1179 table[pos].iter().map(|a_id|
1180 format!("<{}>", alts[*a_id as usize].1.to_str(self.get_symbol_table()))).join(" or "),
1181 alts[chosen as usize].1.to_str(self.get_symbol_table())
1182 ));
1183 table[pos] = greedies;
1184 chosen
1185 } else {
1186 let row = (0..num_t).filter(|j| *j != t_id).flat_map(|j| &table[nt_id * num_t + j]).collect::<HashSet<_>>();
1187 let chosen = *table[pos].iter().find(|a| !row.contains(a)).unwrap_or(&table[pos][0]);
1188 self.log.add_warning(
1189 format!("- calc_table: ambiguity for NT '{}', T '{}': {} => <{}> has been chosen",
1190 Symbol::NT(nt_id as VarId).to_str(self.get_symbol_table()),
1191 if t_id < self.num_t { Symbol::T(t_id as VarId).to_str(self.get_symbol_table()) } else { "<EOF>".to_string() },
1192 table[pos].iter().map(|a_id|
1193 format!("<{}>", alts[*a_id as usize].1.to_str(self.get_symbol_table()))).join(" or "),
1194 alts[chosen as usize].1.to_str(self.get_symbol_table())
1195 ));
1196 table[pos] = vec![chosen];
1197 chosen
1198 }
1199 }
1200 });
1201 }
1202 }
1203 if !(0..num_t - 1).any(|t_id| (0..num_nt).any(|nt_id| final_table[nt_id * num_t + t_id] < error_skip)) {
1204 self.log.add_error("- calc_table: no terminal used in the table".to_string());
1205 }
1206 for (_, a) in &mut alts {
1207 a.flags &= !ruleflag::GREEDY;
1208 }
1209 let table = LLParsingTable { num_nt, num_t, alts, table: final_table, flags: self.flags.clone(), parent: self.parent.clone() };
1210 self.log.add_info("parsing table:");
1211 self.log.extend_messages(
1212 table.to_str(self.get_symbol_table()).into_iter()
1213 .map(LogMsg::Info)
1214 );
1215 table
1216 }
1217
1218 pub fn make_parsing_table(&mut self, error_recovery: bool) -> LLParsingTable {
1219 self.log.add_note("- calculating parsing table...");
1220 let first = self.calc_first();
1221 let follow = self.calc_follow(&first);
1222 self.calc_table(&first, &follow, error_recovery)
1223 }
1224
1225 pub fn gen_tables_source_code(&self, indent: usize) -> String {
1226 let st = self.symbol_table.as_ref().unwrap();
1227 let mut source = Vec::<String>::new();
1228 source.push(format!("static ORIGIN: [(Option<usize>, &[(GrNode, &[usize])]); {}] = [", self.origin.trees.len()));
1230 for t in &self.origin.trees {
1231 let tree_str = (0..t.len())
1232 .map(|i| format!("({}, &[{}])", t.get(i).gen_source_code(), t.children(i).iter().join(",")))
1233 .join(", ");
1234 source.push(format!(" ({:?}, &[{}]),", t.get_root(), tree_str));
1235 }
1236 source.push("];".to_string());
1237 source.push(format!("static MAP: [(VarId, (VarId, usize)); {}] = [", self.origin.map.len()));
1238 let mut sorted_map = self.origin.map.iter().to_vec();
1239 sorted_map.sort(); source.extend(sorted_map.chunks(5)
1241 .map(|chk| format!(" {},", chk.iter().map(|(a, (c, d))| format!("({a}, ({c}, {d}))")).join(", "))));
1242 source.push("];".to_string());
1243 source.push("let origin = Origin::from_data(".to_string());
1244 source.push(" ORIGIN.into_iter().map(|(root, nodes)| GrTree::from((root, nodes.to_vec()))).collect(),".to_string());
1245 source.push(" HashMap::from(MAP));".to_string());
1246 source.push(String::new());
1248 source.push("let ll1_tables = ProdRuleSetTables::new(".to_string());
1249 source.push(format!(" {:?},", self.name));
1250 source.push(" vec![".to_string());
1251 source.extend(self.prules.as_ref().unwrap().iter().map(|prule| format!(" {},", prule_to_macro(prule))));
1252 source.push(" ],".to_string());
1253 source.push(" origin,".to_string());
1254 source.push(format!(" vec![{}],", st.get_terminals().map(|x| format!("{x:?}")).join(", ")));
1255 source.push(format!(" vec![{}],", st.get_nonterminals().map(|x| format!("{x:?}")).join(", ")));
1256 source.push(format!(" vec![{}],", self.flags.iter().join(", ")));
1257 source.push(format!(" vec![{}],", self.parent.iter().map(|p_maybe| format!("{p_maybe:?}")).join(", ")));
1258 source.push(format!(" {:?},", self.start));
1259 source.push(format!(" {},", self.disable_warning_unused_nt_t));
1260 source.push(format!(" hashmap![{}]", self.nt_conversion.iter().map(|(v, conv)| format!("{v} => {conv:?}")).join(", ")));
1261 source.push(");".to_string());
1262 indent_source(vec![source], indent)
1263 }
1264}
1265
1266pub struct ProdRuleSetTables {
1269 name: Option<String>,
1270 prules: Vec<ProdRule>,
1271 origin: Origin<VarId, FromPRS>,
1272 t: Vec<(String, Option<String>)>, nt: Vec<String>, flags: Vec<u32>,
1275 parent: Vec<Option<VarId>>,
1276 start: Option<VarId>,
1277 disable_warning_unused_nt_t: bool,
1278 nt_conversion: HashMap<VarId, NTConversion>,
1279}
1280
1281impl ProdRuleSetTables {
1282 pub fn new<T: Into<String>>(
1283 name: Option<T>,
1284 prules: Vec<ProdRule>,
1285 origin: Origin<VarId, FromPRS>,
1286 t: Vec<(T, Option<T>)>,
1287 nt: Vec<T>,
1288 flags: Vec<u32>,
1289 parent: Vec<Option<VarId>>,
1290 start: Option<VarId>,
1291 disable_warning_unused_nt_t: bool,
1292 nt_conversion: HashMap<VarId, NTConversion>,
1293 ) -> Self {
1294 let t = t.into_iter().map(|(t, t_maybe)| (t.into(), t_maybe.map(|t| t.into()))).collect();
1295 let nt = nt.into_iter().map(|nt| nt.into()).collect();
1296 ProdRuleSetTables {
1297 name: name.map(|s| s.into()),
1298 prules,
1299 origin, t, nt, flags, parent, start,
1300 disable_warning_unused_nt_t, nt_conversion,
1301 }
1302 }
1303
1304 pub fn get_name(&self) -> Option<&String> {
1305 self.name.as_ref()
1306 }
1307}
1308
1309impl BuildFrom<ProdRuleSetTables> for ProdRuleSet<LL1> {
1310 fn build_from(source: ProdRuleSetTables) -> Self {
1311 let mut symbol_table = SymbolTable::new();
1312 symbol_table.extend_terminals(source.t);
1313 symbol_table.extend_nonterminals(source.nt);
1314 ProdRuleSet {
1315 prules: Some(source.prules),
1316 origin: source.origin,
1317 num_nt: symbol_table.get_num_nt(),
1318 num_t: symbol_table.get_num_t(),
1319 symbol_table: Some(symbol_table),
1320 flags: source.flags,
1321 parent: source.parent,
1322 start: source.start,
1323 name: source.name,
1324 nt_conversion: source.nt_conversion,
1325 log: BufLog::new(),
1326 disable_warning_unused_nt_t: source.disable_warning_unused_nt_t,
1327 _phantom: PhantomData,
1328 }
1329 }
1330}
1331
1332impl BuildFrom<RuleTreeSet<Normalized>> for ProdRuleSet<General> {
1335 fn build_from(mut rules: RuleTreeSet<Normalized>) -> Self {
1340 fn children_to_vec(tree: &GrTree, parent_id: usize) -> Alternative {
1341 let mut flags: u32 = 0;
1342 let alt = tree.children(parent_id).iter()
1343 .map(|id| tree.get(*id))
1344 .filter(|node| {
1345 match **node {
1346 GrNode::RAssoc => {
1347 flags |= ruleflag::R_ASSOC;
1348 false
1349 }
1350 GrNode::LForm(_) => {
1351 flags |= ruleflag::L_FORM;
1352 false
1353 }
1354 GrNode::PrecEq => {
1355 flags |= ruleflag::PREC_EQ;
1356 false
1357 }
1358 GrNode::Greedy => {
1359 flags |= ruleflag::GREEDY;
1360 false
1361 }
1362 _ => true,
1363 }
1364 })
1365 .map(|node| {
1366 match node {
1367 GrNode::Symbol(s) => *s,
1368 x => panic!("unexpected symbol {x} under &")
1369 }
1370 }).to_vec();
1371 Alternative::new(alt).with_flags(flags)
1372 }
1373 let mut prules = Self::with_capacity(rules.trees.len());
1374 prules.start = rules.start;
1375 prules.symbol_table = rules.symbol_table;
1376 prules.flags = rules.flags;
1377 prules.parent = rules.parent;
1378 prules.nt_conversion = rules.nt_conversion;
1379 prules.log = rules.log;
1380 if !prules.log.has_no_errors() {
1381 return prules;
1385 }
1386 prules.origin = Origin::<VarId, FromPRS>::from_trees_mut(&mut rules.origin.trees);
1387 for (var, tree) in rules.trees.iter().index() {
1388 if !tree.is_empty() {
1389 let root = tree.get_root().expect("tree {var} has no root");
1390 let root_sym = tree.get(root);
1391 let mut prule = match root_sym {
1392 GrNode::Symbol(s) => {
1393 let mut alt = Alternative::new(vec![*s]);
1394 if let Some(&(v, ch)) = rules.origin.map.get(&(var, root)) {
1395 alt.origin = Some((v, ch));
1396 }
1397 vec![alt]
1398 },
1399 GrNode::Concat => {
1400 let mut alt = children_to_vec(tree, root);
1401 if let Some(&(v, ch)) = rules.origin.map.get(&(var, root)) {
1402 alt.origin = Some((v, ch));
1403 }
1404 vec![alt]
1405 },
1406 GrNode::Or => tree.children(root).iter()
1407 .map(|id| {
1408 let child = tree.get(*id);
1409 let mut alt = if let GrNode::Symbol(s) = child {
1410 Alternative::new(vec![*s])
1411 } else {
1412 assert_eq!(*child, GrNode::Concat, "unexpected symbol {child} under |");
1413 children_to_vec(tree, *id)
1414 };
1415 if let Some(&(v, ch)) = rules.origin.map.get(&(var, *id)) {
1416 alt.origin = Some((v, ch));
1417 }
1418 alt
1419 }).to_vec(),
1420 s => panic!("unexpected symbol {s} as root of normalized GrTree for NT {}", Symbol::NT(var).to_str(prules.get_symbol_table()))
1421 };
1422 if prule.iter().any(|f| f.flags & ruleflag::L_FORM != 0) {
1423 prules.set_flags(var, ruleflag::L_FORM);
1436 for a in prule.iter_mut() {
1438 a.flags &= !ruleflag::L_FORM;
1439 }
1440 }
1441 if prules.parent[var as usize].is_some() {
1444 if let Some(&(v, index)) = rules.origin.map.get(&(var, root)) {
1445 prules.origin.add(var, (v, index));
1446 }
1447 }
1448 prules.prules.as_mut().unwrap().push(prule);
1449 } else {
1450 prules.prules.as_mut().unwrap().push(ProdRule::new()); }
1452 }
1453 prules.calc_num_symbols();
1454 prules
1455 }
1456}
1457
1458impl BuildFrom<RuleTreeSet<General>> for ProdRuleSet<General> {
1459 fn build_from(rules: RuleTreeSet<General>) -> Self {
1464 let rts = RuleTreeSet::<Normalized>::build_from(rules);
1465 let mut prs = ProdRuleSet::build_from(rts);
1466 if prs.log.has_no_errors() {
1467 prs.simplify();
1468 }
1469 prs
1470 }
1471}
1472
1473impl BuildFrom<ProdRuleSet<General>> for ProdRuleSet<LL1> {
1474 fn build_from(mut rules: ProdRuleSet<General>) -> Self {
1475 if rules.log.has_no_errors() {
1476 rules.remove_recursion();
1477 rules.left_factorize();
1478 rules.transfer_alt_flags();
1479 rules.check_flags();
1480 rules.log.add_note("final rule set:");
1481 rules.log.extend_messages(
1482 rules.prs_alt_origins_str(false).into_iter().map(LogMsg::Note)
1483 );
1484 }
1485 ProdRuleSet::<LL1> {
1486 prules: rules.prules,
1487 origin: rules.origin,
1488 num_nt: rules.num_nt,
1489 num_t: rules.num_t,
1490 symbol_table: rules.symbol_table,
1491 flags: rules.flags,
1492 parent: rules.parent,
1493 start: rules.start,
1494 name: rules.name,
1495 nt_conversion: rules.nt_conversion,
1496 log: rules.log,
1497 disable_warning_unused_nt_t: rules.disable_warning_unused_nt_t,
1498 _phantom: PhantomData,
1499 }
1500 }
1501}
1502
1503impl BuildFrom<ProdRuleSet<General>> for ProdRuleSet<LR> {
1504 fn build_from(mut rules: ProdRuleSet<General>) -> Self {
1505 if rules.log.has_no_errors() {
1506 rules.remove_ambiguity();
1507 rules.transfer_alt_flags();
1508 rules.check_flags();
1509 }
1510 ProdRuleSet::<LR> {
1511 prules: rules.prules,
1512 origin: rules.origin,
1513 num_nt: rules.num_nt,
1514 num_t: rules.num_t,
1515 symbol_table: rules.symbol_table,
1516 flags: rules.flags,
1517 parent: rules.parent,
1518 start: rules.start,
1519 name: rules.name,
1520 nt_conversion: rules.nt_conversion,
1521 log: rules.log,
1522 disable_warning_unused_nt_t: rules.disable_warning_unused_nt_t,
1523 _phantom: PhantomData,
1524 }
1525 }
1526}
1527
1528impl<T> ProdRuleSet<T> {
1532 pub fn print_rules(&self, as_comment: bool, filter_empty_nt: bool) {
1533 let prefix = if as_comment { " // " } else { " " };
1534 println!("{prefix}{}",
1535 self.get_prules_iter()
1536 .filter(|(_, rule)| !filter_empty_nt || **rule != prule!(e))
1537 .map(|(var, p)|
1538 format!("({var}) {} -> {}",
1539 Symbol::NT(var).to_str(self.get_symbol_table()),
1540 prule_to_str(p, self.get_symbol_table())))
1541 .join(&format!("\n{prefix}")));
1542 }
1543
1544 pub fn print_alts(&self) {
1545 println!("Alternatives:\n{}",
1546 self.get_alts().enumerate().map(|(id, (v, a))|
1547 format!(" // - {id}: {} -> {}{}",
1548 Symbol::NT(v).to_str(self.get_symbol_table()),
1549 a.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
1550 if a.flags != 0 { format!(" {} ({})", ruleflag::to_string(a.flags).join(" | "), a.flags) } else { "".to_string() }
1551 )
1552 ).join("\n"));
1553 }
1554}
1555
1556impl LLParsingTable {
1557 pub fn to_str(&self, symbol_table: Option<&SymbolTable>) -> Vec<String> {
1558 let LLParsingTable { num_nt, num_t, alts, table, .. } = self;
1559 let error_skip = alts.len() as AltId;
1560 let error_pop = error_skip + 1;
1561 let str_nt = (0..*num_nt).map(|i| Symbol::NT(i as VarId).to_str(symbol_table)).to_vec();
1562 let max_nt_len = str_nt.iter().map(|s| s.len()).max().unwrap();
1563 let str_t = (0..*num_t).map(|j| if j + 1 < *num_t { Symbol::T(j as VarId).to_str_quote(symbol_table) } else { "$".to_string() }).to_vec();
1564 let t_len = str_t.iter().map(|s| s.len().max(3)).to_vec();
1565 let mut lines = vec![];
1566 lines.push(format!("{:<w$} | {}", "", (0..*num_t).map(|j| format!("{:^w$}", str_t[j], w = t_len[j])).join(" "), w = max_nt_len));
1567 lines.push(format!("{:-<w$}-+-{:-<t$}", "", "", w = max_nt_len, t = *num_t + t_len.iter().sum::<usize>()));
1568 for i in 0..*num_nt {
1569 let mut line = format!("{:<w$} |", str_nt[i], w = max_nt_len);
1570 for j in 0..*num_t {
1571 let value = table[i * num_t + j];
1572 if value < error_skip {
1573 line.push_str(&format!(" {:^w$}", value, w = t_len[j]));
1574 } else {
1575 line.push_str(&format!(" {:^w$}", if value == error_pop { "p" } else { "." }, w = t_len[j]));
1576 }
1577 }
1578 lines.push(line);
1579 }
1580 lines
1581 }
1582
1583 pub fn print(&self, symbol_table: Option<&SymbolTable>, indent: usize) {
1584 let lines = self.to_str(symbol_table);
1585 for line in lines {
1586 println!("{:<1$}// {line}", "", indent);
1587 }
1588 }
1589}