1use super::*;
4use crate::{columns_to_str, AltId, SymbolTable, VarId};
5use lexigram_core::alt::{alt_to_rule_str, Alternative};
6use lexigram_core::{CharLen, CollectJoin};
7use lexigram_core::log::LogMsg;
8
9pub type ProdRule = Vec<Alternative>;
21
22pub fn prule_to_str(prule: &ProdRule, symbol_table: Option<&SymbolTable>) -> String {
23 prule.iter().map(|alt| alt.to_str(symbol_table)).join(" | ")
24}
25
26pub fn prule_to_rule_str(nt: VarId, prule: &ProdRule, symbol_table: Option<&SymbolTable>) -> String {
27 format!("{} -> {}", Symbol::NT(nt).to_str(symbol_table), prule.iter().map(|alt| alt.to_str(symbol_table)).join(" | "))
28
29}
30
31pub fn prule_to_macro(prule: &ProdRule) -> String {
32 format!("prule!({})", prule.iter().map(|alt| alt.to_macro_item()).join("; "))
33}
34
35#[derive(Clone, Copy, PartialEq, Debug)]
36enum AltType { Independant, LeftAssoc, Prefix, RightAssoc, Suffix }
37
38impl AltType {
39 fn from(nt: &Symbol, alt: &Alternative) -> Self {
40 let left = matches!(alt.first(), Some(var) if var == nt);
41 let right = alt.len() > 1 && matches!(alt.last(), Some(var) if var == nt);
42 match (left, right) {
43 (false, false) => AltType::Independant,
44 (false, true) => AltType::Prefix,
45 (true, false) => AltType::Suffix,
46 (true, true) => if alt.flags & ruleflag::R_ASSOC != 0 { AltType::RightAssoc } else { AltType::LeftAssoc },
47 }
48 }
49}
50
51#[derive(Debug, Clone)]
52struct AltInfo {
53 #[allow(unused)]
54 pred_priority: Option<AltId>,
55 ivar: usize,
56 ty: AltType
57}
58
59#[derive(Debug)]
60pub struct LLParsingTable {
61 pub num_nt: usize,
62 pub num_t: usize, pub alts: Vec<(VarId, Alternative)>,
64 pub table: Vec<AltId>,
65 pub flags: Vec<u32>, pub parent: Vec<Option<VarId>>, }
68
69impl LLParsingTable {
70 pub fn new() -> Self {
71 LLParsingTable { num_nt: 0, num_t: 0, alts: vec![], table: vec![], flags: vec![], parent: vec![] }
72 }
73
74 pub fn get_top_parent(&self, nt: VarId) -> VarId {
75 let mut var = nt;
76 while let Some(parent) = self.parent[var as usize] {
77 var = parent;
78 }
79 var
80 }
81}
82
83impl Default for LLParsingTable {
84 fn default() -> Self {
85 Self::new()
86 }
87}
88
89#[derive(Clone, Debug)]
90pub struct ProdRuleSetOptions {
91 pub ansi: bool,
92 pub disable_warning_unused_nt_t: bool,
93}
94
95impl Default for ProdRuleSetOptions {
96 fn default() -> Self {
97 ProdRuleSetOptions {
98 ansi: true,
99 disable_warning_unused_nt_t: false,
100 }
101 }
102}
103
104#[derive(Clone, Debug)]
105pub struct ProdRuleSet<T> {
106 pub(super) prules: Option<Vec<ProdRule>>,
107 pub(crate) origin: Origin<VarId, FromPRS>,
108 pub(super) num_nt: usize,
109 pub(super) num_t: usize,
110 pub(crate) symbol_table: Option<SymbolTable>,
111 pub(super) flags: Vec<u32>,
112 pub(super) parent: Vec<Option<VarId>>,
113 pub(super) start: Option<VarId>,
114 pub(crate) name: Option<String>,
115 pub(crate) nt_conversion: HashMap<VarId, NTConversion>,
116 pub(crate) log: BufLog,
117 pub(crate) options: ProdRuleSetOptions,
118 pub(super) _phantom: PhantomData<T>
119}
120
121impl<T> ProdRuleSet<T> {
122 pub fn set_options(&mut self, options: ProdRuleSetOptions) {
123 self.options = options;
124 }
125
126 pub fn get_start(&self) -> Option<VarId> {
128 self.start
129 }
130
131 pub fn set_start(&mut self, start: VarId) {
133 self.start = Some(start);
134 }
135
136 pub fn set_disable_warning_unused_nt_t(&mut self, flag: bool) {
139 self.options.disable_warning_unused_nt_t = flag;
140 }
141
142 pub fn get_name(&self) -> Option<&String> {
143 self.name.as_ref()
144 }
145
146 pub fn set_name(&mut self, name: Option<String>) {
147 self.name = name;
148 }
149
150 pub fn get_next_available_var(&self) -> VarId {
152 self.prules.as_ref().unwrap().len() as VarId }
154
155 pub fn get_prules_iter(&self) -> impl Iterator<Item=(VarId, &ProdRule)> {
157 self.prules.as_ref().unwrap().iter().index().filter_map(|(id, p)| if p.is_empty() { None } else { Some((id, p)) })
158 }
159
160 pub fn get_prules_iter_mut(&mut self) -> impl Iterator<Item=(VarId, &mut ProdRule)> {
161 self.prules.as_mut().unwrap().iter_mut().enumerate().filter_map(|(id, p)| if p.is_empty() { None } else { Some((id as VarId, p)) })
162 }
163
164 pub fn get_alts(&self) -> impl Iterator<Item=(VarId, &Alternative)> {
165 self.prules.as_ref().unwrap().iter().enumerate()
166 .flat_map(|(v, p)| p.iter().map(move |alt| (v as VarId, alt)))
167 }
168
169 pub fn set_symbol_table(&mut self, symbol_table: SymbolTable) {
170 self.symbol_table = Some(symbol_table);
171 }
172
173 pub fn get_symbol_table(&self) -> Option<&SymbolTable> {
174 self.symbol_table.as_ref()
175 }
176
177 pub fn get_num_nt(&self) -> usize {
178 self.num_nt
179 }
180
181 pub fn get_num_t(&self) -> usize {
182 self.num_t
183 }
184
185 pub fn set_flags(&mut self, nt: VarId, new_flags: u32) {
188 let nt = nt as usize;
189 if nt >= self.flags.len() {
190 self.flags.resize(nt + 1, 0);
191 }
192 self.flags[nt] |= new_flags;
193 }
194
195 pub fn get_flags(&self, nt: VarId) -> u32 {
196 let nt = nt as usize;
197 if nt < self.flags.len() {
198 self.flags[nt]
199 } else {
200 0
201 }
202 }
203
204 pub(super) fn set_parent(&mut self, child: VarId, parent: VarId) {
205 let child = child as usize;
206 if child >= self.parent.len() {
207 self.parent.resize(child + 1, None);
208 }
209 self.parent[child] = Some(parent);
210 }
211
212 #[allow(unused)]
213 #[cfg(test)] pub(super) fn get_parent(&self, child: VarId) -> Option<VarId> {
215 let child = child as usize;
216 if child >= self.parent.len() {
217 None
218 } else {
219 self.parent[child]
220 }
221 }
222
223 fn get_top_parent(&self, nt: VarId) -> VarId {
224 let mut var = nt;
225 while let Some(parent) = self.parent[var as usize] {
226 var = parent;
227 }
228 var
229 }
230
231 pub(super) fn calc_num_symbols(&mut self) {
238 self.num_nt = self.prules.as_ref().unwrap().len();
239 self.num_t = self.prules.as_ref().unwrap().iter().map(|p|
240 p.iter().map(|f|
241 f.iter().filter_map(|s|
242 if let Symbol::T(v) = s { Some(*v + 1) } else { None }
243 ).max().unwrap_or(0)
244 ).max().unwrap_or(0)
245 ).max().unwrap_or(0) as usize;
246 if let Some(st) = self.symbol_table.as_mut() {
247 st.downsize_num_t(self.num_t)
248 }
249 self.flags.resize(self.num_nt, 0);
250 self.parent.resize(self.num_nt, None);
251 }
252
253 pub(super) fn simplify(&mut self) {
255 for p in self.prules.as_mut().unwrap() {
256 let mut has_empty = false;
257 let mut i = 0;
258 while i < p.len() {
259 let alt = p.get_mut(i).unwrap();
260 let mut j = 0;
261 while j < alt.len() {
262 if alt[j].is_empty() && (j > 0 || j + 1 < alt.len()) {
263 alt.v.remove(j);
264 } else {
265 j += 1;
266 }
267 }
268 if alt.len() == 1 && alt[0].is_empty() {
269 if has_empty {
270 p.remove(i);
271 } else {
272 has_empty = true;
273 i += 1;
274 }
275 } else {
276 i += 1;
277 }
278 }
279 }
280 }
281
282 fn check_num_nt_coherency(&mut self) {
283 if let Some(n) = self.symbol_table.as_ref().map(|table| table.get_num_nt()) {
284 let num_nt = self.prules.as_ref().unwrap().len();
285 if n != num_nt {
286 self.log.add_error(format!("there are {num_nt} rules but the symbol table has {n} nonterminal symbols: dropping the table"));
287 self.symbol_table = None;
288 }
289 }
290 }
291
292 fn cleanup_symbols(&mut self, keep: &mut HashSet<Symbol>) {
297 const VERBOSE: bool = false;
298 let mut nt_content = (0..self.num_nt).map(|v| if self.parent[v].is_none() { Some(v as VarId) } else { None }).to_vec();
299 for (from, cnv) in self.nt_conversion.iter() {
300 let MovedTo(to) = cnv else { panic!("{cnv:?} unexpected") };
302 nt_content[*from as usize] = None;
303 nt_content[*to as usize] = Some(*from);
304 }
305 if VERBOSE {
306 println!("Removing unused nonterminals:");
307 let mut all_h = self.prules.as_ref().unwrap().iter().flat_map(|p| p.iter().flat_map(|x| &x.v)).cloned().collect::<HashSet<_>>();
308 all_h.extend((0..self.num_nt).map(|i| Symbol::NT(i as VarId)));
309 let mut all = all_h.into_iter().collect::<Vec<_>>();
310 all.sort();
311 println!(" current NT symbols: {}", all.iter().filter_map(|s|
312 if let Symbol::NT(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(", "));
313 println!(" current T symbols: {}", all.iter().filter_map(|s|
314 if let Symbol::T(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(" "));
315 let mut used = keep.iter().collect::<Vec<_>>();
316 used.sort();
317 println!(" used NT symbols: {}", used.iter().filter_map(|s| if let Symbol::NT(v) = s { Some((v, s)) } else { None })
318 .enumerate().map(|(new_id, (id, s))| format!("{}({id} -> {new_id})", s.to_str(self.get_symbol_table()))).join(", "));
319 println!(" used T symbols: {}", used.iter().filter_map(|s|
320 if let Symbol::T(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(" "));
321 println!(" nt_conversion: {:?}", self.nt_conversion);
322 println!(" nt_content: {}", nt_content.iter().enumerate()
323 .filter_map(|(v, f)| f.as_ref().map(|from| format!("{v}:{from}")))
324 .join(", ")
325 );
326 }
327 self.nt_conversion.clear();
328 let new_num_nt = keep.iter().filter(|s| matches!(s, Symbol::NT(_))).count() as VarId;
329 let mut new_v = new_num_nt;
330 let mut conv = HashMap::<VarId, VarId>::new();
331 for i in (0..self.num_nt).rev() {
332 let v = i as VarId;
333 let symbol = Symbol::NT(v);
334 if !keep.contains(&symbol) {
335 if VERBOSE { println!("- deleting {}", symbol.to_str(self.get_symbol_table())); }
336 if self.parent[i].is_none() {
337 self.nt_conversion.insert(v, Removed);
338 }
339 nt_content.remove(i);
340 self.prules.as_mut().unwrap().remove(i);
341 self.start = self.start.map(|s| if s >= v { s - 1 } else { s });
342 if let Some(t) = self.symbol_table.as_mut() { t.remove_nonterminal(v) };
343 self.flags.remove(i);
344 self.parent.remove(i);
345 if self.origin.trees.len() > i {
346 self.origin.trees.remove(i);
347 }
348
349 } else {
350 new_v -= 1;
351 conv.insert(v, new_v);
352 if VERBOSE { println!("- {symbol:?} -> {:?}", Symbol::NT(new_v)); }
353 }
354 }
355 for p in self.prules.as_mut().unwrap() {
356 for f in p {
357 for s in &mut f.v {
358 if let Symbol::NT(s_var) = s {
359 if let Some(new) = conv.get(s_var) {
360 *s = Symbol::NT(*new);
361 }
362 }
363 }
364 if let Some((ref mut var, _id)) = f.origin {
365 if let Some(new_var) = conv.get(var) {
366 *var = *new_var;
367 }
368 }
369 }
370 }
371 for p in &mut self.parent {
372 if let Some(parent) = p {
373 if let Some(new) = conv.get(parent) {
374 *p = Some(*new);
375 }
376 }
377 }
378 for t in &mut self.origin.trees {
379 for mut node in t.iter_post_depth_simple_mut() {
380 if let GrNode::Symbol(Symbol::NT(ref mut v)) = *node {
381 if let Some(new_v) = conv.get(v) {
382 *v = *new_v;
383 }
384 }
385 }
386 }
387 let new_map = self.origin.map.iter()
388 .map(|(v1, (v2, id))| (*conv.get(v1).unwrap_or(v1), (*conv.get(v2).unwrap_or(v2), *id)))
389 .collect::<HashMap<_, _>>();
390 self.origin.map = new_map;
394 keep.retain(|s| !matches!(s, Symbol::NT(_)));
395 keep.extend((0..new_num_nt as VarId).map(Symbol::NT));
396 self.num_nt = new_num_nt as usize;
397 if VERBOSE {
398 println!("-> nt_content: {}", nt_content.iter().enumerate()
399 .filter_map(|(v, f)| f.map(|from| format!("{v}:{from}")))
400 .join(", ")
401 );
402 }
403 for (to, f) in nt_content.into_iter().index() {
404 if let Some(from) = f {
405 if from != to {
406 self.nt_conversion.insert(from, MovedTo(to as VarId));
407 } else if self.nt_conversion.get(&from) == Some(&Removed) {
408 self.nt_conversion.remove(&from);
409 }
410 }
411 };
412 if VERBOSE { println!("-> nt_conversion: {:?}", self.nt_conversion); }
413 }
414
415 pub fn calc_first(&mut self) -> HashMap<Symbol, HashSet<Symbol>> {
416 const VERBOSE: bool = false;
417 if self.start.is_none() {
418 self.log.add_error("calc_first: start NT symbol not defined");
419 }
420 if self.num_nt == 0 {
421 self.log.add_error("calc_first: no nonterminal in grammar".to_string());
422 }
423 if !self.log.has_no_errors() {
424 return HashMap::new();
425 }
426 let mut symbols = HashSet::<Symbol>::new();
427 let mut stack = vec![Symbol::NT(self.start.unwrap())];
428 while let Some(sym) = stack.pop() {
429 if !symbols.contains(&sym) {
430 symbols.insert(sym);
431 if let Symbol::NT(v) = sym {
432 stack.extend(self.prules.as_ref().unwrap()[v as usize].iter().flat_map(|x| &x.v));
433 }
434 }
435 }
436 if symbols.iter().filter(|s| matches!(s, Symbol::NT(_))).count() != self.num_nt {
437 let nt_removed = (0..self.num_nt as VarId)
438 .filter(|v| !symbols.contains(&Symbol::NT(*v)) && !matches!(self.nt_conversion.get(v), Some(MovedTo(_))))
440 .map(Symbol::NT)
441 .to_vec();
442 if !nt_removed.is_empty() {
443 let msg = format!(
444 "calc_first: unused nonterminals: {}",
445 nt_removed.into_iter().map(|s| format!("{:?} = {}", s, s.to_str(self.get_symbol_table()))).join(", "));
446 if !self.options.disable_warning_unused_nt_t {
447 self.log.add_warning(msg);
448 } else {
449 self.log.add_note(msg);
450 }
451 }
452 self.cleanup_symbols(&mut symbols);
453 }
454
455 let unused_t = (0..self.num_t)
456 .filter_map(|t_id| {
457 let s = Symbol::T(t_id as VarId);
458 if !symbols.contains(&s) { Some(format!("T({t_id}) = {}", s.to_str(self.get_symbol_table()))) } else { None }
459 }).to_vec();
460 if !unused_t.is_empty() {
461 let msg = format!("calc_first: unused terminals: {}", unused_t.join(", "));
462 if !self.options.disable_warning_unused_nt_t {
463 self.log.add_warning(msg)
464 } else {
465 self.log.add_note(msg);
466 }
467 }
468
469 let mut first = symbols.into_iter().map(|sym| {
470 match &sym {
471 Symbol::T(_) | Symbol::Empty => (sym, hashset![sym]),
472 Symbol::NT(_) => (sym, HashSet::new()),
473 Symbol::End => panic!("found reserved symbol {sym:?} in production rules"),
474 }
475 }).collect::<HashMap<_, _>>();
476 let mut change = true;
477 let rules = (0..self.num_nt as VarId).filter(|var| first.contains_key(&Symbol::NT(*var))).to_vec();
478 if VERBOSE { println!("rules: {}", rules.iter().map(|v| Symbol::NT(*v).to_str(self.symbol_table.as_ref())).join(", ")); }
479 while change {
480 change = false;
481 for i in &rules {
482 let prule = &self.prules.as_ref().unwrap()[*i as usize];
483 let symbol = Symbol::NT(*i as VarId);
484 if VERBOSE { println!("- {} -> {}", symbol.to_str(self.symbol_table.as_ref()), prule_to_str(prule, self.symbol_table.as_ref())); }
485 let num_items = first[&symbol].len();
486 for alt in prule {
487 if VERBOSE { println!(" - {}", alt.to_str(self.symbol_table.as_ref())); }
488 assert!(!alt.is_empty(), "empty alternative for {}: {}",
489 symbol.to_str(self.symbol_table.as_ref()), alt.to_str(self.symbol_table.as_ref()));
490 if VERBOSE {
491 print!(" [0] {}", alt[0].to_str(self.symbol_table.as_ref()));
492 println!(", first = {}", first[&alt[0]].iter().map(|s| s.to_str(self.symbol_table.as_ref())).join(", "));
493 }
494 let new = alt.calc_alt_first(&first);
495 let _n = first.get(&symbol).unwrap().len();
496 first.get_mut(&symbol).unwrap().extend(new);
497 if VERBOSE && first.get(&symbol).unwrap().len() > _n {
498 println!(" first[{}] -> {}", symbol.to_str(self.get_symbol_table()),
499 first.get(&symbol).unwrap().iter().map(|s| s.to_str(self.get_symbol_table())).join(", "));
500 }
501 }
502 change |= first[&symbol].len() > num_items;
503 }
504 if VERBOSE && change { println!("---------------------------- again"); }
505 }
506 if self.num_t == 0 {
507 self.log.add_error("calc_first: no terminal in grammar".to_string());
508 }
509 first
510 }
511
512 pub fn calc_follow(&self, first: &HashMap<Symbol, HashSet<Symbol>>) -> HashMap<Symbol, HashSet<Symbol>> {
513 const VERBOSE: bool = false;
514 assert!(self.start.is_some(), "start NT symbol not defined");
515 if !self.log.has_no_errors() {
516 return HashMap::new();
517 }
518 let mut follow = first.iter()
519 .filter_map(|(s, _)| if matches!(s, Symbol::NT(_)) { Some((*s, HashSet::<Symbol>::new())) } else { None })
520 .collect::<HashMap<_, _>>();
521 follow.get_mut(&Symbol::NT(self.start.unwrap())).unwrap().insert(Symbol::End);
522 let rules = (0..self.num_nt as VarId).filter(|var| follow.contains_key(&Symbol::NT(*var))).to_vec();
523 let mut change = true;
524 while change {
525 change = false;
526 for i in &rules {
527 let prule = &self.prules.as_ref().unwrap()[*i as usize];
528 let symbol = Symbol::NT(*i as VarId);
529 if VERBOSE { println!("- {} -> {}", symbol.to_str(self.symbol_table.as_ref()), prule_to_str(prule, self.symbol_table.as_ref())); }
530 for alt in prule {
531 if VERBOSE { println!(" - {}", alt.to_str(self.symbol_table.as_ref())); }
532 let mut trail = follow.get(&symbol).unwrap().clone();
533 for sym_i in alt.iter().rev() {
534 if let Symbol::NT(_) = sym_i {
535 let num_items = follow.get(sym_i).unwrap().len();
536 follow.get_mut(sym_i).unwrap().extend(&trail);
537 if VERBOSE && follow.get(sym_i).unwrap().len() > num_items {
538 println!(" follow[{}] -> {}", sym_i.to_str(self.get_symbol_table()),
539 follow.get(sym_i).unwrap().iter().map(|s| s.to_str(self.get_symbol_table())).join(", "));
540 }
541 change |= follow.get(sym_i).unwrap().len() > num_items;
542 if first[sym_i].contains(&Symbol::Empty) {
543 trail.extend(first[sym_i].iter().filter(|s| *s != &Symbol::Empty));
544 } else {
545 trail.clear();
546 trail.extend(&first[sym_i]);
547 }
548 } else {
549 trail.clear();
550 trail.insert(*sym_i);
551 }
552 }
553 }
554 }
555 if VERBOSE && change { println!("---------------------------- again"); }
556 }
557 follow
558 }
559
560 pub(super) fn remove_recursion(&mut self) {
574 const MAX_DISTRIB_LEN: Option<usize> = None; const DONT_DISTRIB_IN_AMBIG: bool = true; const VERBOSE: bool = false;
580
581 self.log.add_note("removing left / binary recursion in grammar...");
582 self.check_num_nt_coherency();
583 if VERBOSE {
584 println!("ORIGINAL:");
585 self.print_rules(false, false);
586 }
587 let next_avail_var = self.get_next_available_var() as usize;
588 let mut var_new = next_avail_var;
589 let mut prules = self.prules.take().unwrap();
591 let mut ambig_alt_id = 0;
592 for var in 0..next_avail_var {
593 let prule = prules.get_mut(var).unwrap();
594 let var = var as VarId;
595 let symbol = Symbol::NT(var);
596 let var_name = symbol.to_str(self.get_symbol_table());
597 let mut extra_prods = Vec::<ProdRule>::new();
598 if prule.iter().any(|p| !p.is_empty() && (p.first().unwrap() == &symbol)) {
599 let orig_str = format!("{var_name} -> {}", prule_to_str(prule, self.get_symbol_table()));
600 self.log.add_note(format!("- modifying: {orig_str}"));
601 if VERBOSE { println!("processing: {orig_str}"); }
602
603 let (mut indep, mut alts) : (Vec<_>, Vec<_>) = take(prule).into_iter()
604 .partition(|alt| alt.first().unwrap() != &symbol && alt.last().unwrap() != &symbol);
605 for alt in indep.iter_mut() {
606 alt.retain(|s| !s.is_empty()); }
608 alts.reverse();
609 let mut prec_eq = alts.iter_mut()
610 .map(|f| {
611 let p = f.flags & ruleflag::PREC_EQ != 0;
612 f.flags &= !ruleflag::PREC_EQ;
613 p
614 }).to_vec();
615 prec_eq.pop();
616 prec_eq.insert(0, false);
617 if indep.is_empty() {
618 self.log.add_error(format!("recursive rules must have at least one independent alternative: {} -> {}",
619 Symbol::NT(var).to_str(self.get_symbol_table()),
620 prule_to_str(prule, self.get_symbol_table())));
621 prule.extend(alts);
622 continue;
624 }
625
626 let mut var_i = 0; let mut rule_var_i = 0; let mut var_alts: Vec<Vec<AltId>> = vec![vec![]]; let mut indep_alts = Vec::<AltId>::new();
633 let mut pr_info = Vec::<AltInfo>::new(); let mut has_ambig = false;
635
636 for (i, alt) in alts.iter().index::<AltId>() {
637 let ty = AltType::from(&symbol, alt);
638 has_ambig |= ty == AltType::LeftAssoc || ty == AltType::RightAssoc;
639 var_i = match ty {
640 AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
641 AltType::LeftAssoc => if prec_eq[i as usize] { var_i } else { var_i + 1 },
642 AltType::Prefix
643 | AltType::RightAssoc => if var_i > rule_var_i || prec_eq[i as usize] || var_alts[var_i].is_empty() { var_i } else { var_i + 1 },
644 AltType::Suffix => var_i,
645 };
646 let alt_info = AltInfo {
647 pred_priority: if ty == AltType::Prefix { None } else { Some(i) },
648 ivar: var_i,
649 ty
650 };
651 pr_info.push(alt_info);
652 let top_maybe = match ty {
653 AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
654 AltType::LeftAssoc => Some(var_i - 1), AltType::Prefix => None, AltType::RightAssoc
657 | AltType::Suffix => Some(var_i), };
659 if let Some(top) = top_maybe {
660 rule_var_i = top;
661 var_alts.resize(1 + top, vec![]);
662 (0..=top).for_each(|v| var_alts[v].push(i));
663 } else {
664 indep_alts.push(i);
665 }
666 let var_alt_str = format!(
667 "[{i:2}] {:15}: {:10} => var_i = {var_i:2}, rule_var_i = {rule_var_i:2}, {{{}}}",
668 alt.to_str(self.get_symbol_table()),
669 format!("{ty:?}"), var_alts.iter().enumerate().map(|(i, va)| format!("{i}: {}", va.iter().join(","))).join(" "));
671 self.log.add_note(format!(" - {var_alt_str}"));
672 if VERBOSE { println!("- {var_alt_str}"); }
673 };
674 assert!(var_i <= rule_var_i + 1, "var_i = {var_i}, rule_var_i = {rule_var_i}");
675
676 let need_indep = indep.len() + indep_alts.len() > 1
680 && (MAX_DISTRIB_LEN.map(|max| indep.len() + indep_alts.len() > max).unwrap_or(false) || has_ambig || rule_var_i < var_i)
681 || DONT_DISTRIB_IN_AMBIG && has_ambig;
682 let num_indep = if need_indep { 1 } else { 0 };
683 let mut var_i_nt = Vec::<(VarId, VarId)>::with_capacity(var_i + 1);
684 var_i_nt.push((var, var_new as VarId));
685 var_i_nt.extend((0..var_i).map(|i| ((var_new + i*2 + 1) as VarId, (var_new + i*2 + 2) as VarId)));
686 var_new += rule_var_i * 2 + 1 + num_indep;
687 if VERBOSE { println!("adding {} variables (w/o independent alternatives), need_indep = {need_indep}, var_new: {} -> {var_new}",
688 rule_var_i * 2 + 1 + num_indep, var_new - (rule_var_i * 2 + 1 + num_indep)); }
689 let nt_indep_maybe = if need_indep { Some(var_new as VarId - 1) } else { None };
690 if var_new > VarId::MAX as usize {
691 self.log.add_error(format!("too many nonterminals when expanding {var_name}: {var_new} > {}", VarId::MAX));
692 return;
693 }
694
695 let new_alts = alts.iter().zip(&pr_info).map(|(a, AltInfo { ivar, ty, .. })| {
698 let mut new_a: Alternative;
699 match ty {
700 AltType::LeftAssoc | AltType::RightAssoc => {
701 new_a = Alternative::new(a.v[1..a.len() - 1].to_owned());
702 if need_indep || *ivar <= rule_var_i {
703 new_a.v.push(Symbol::NT(var_i_nt[*ivar].0));
704 } else {
705 new_a.v.extend(&indep[0].v);
706 }
707 if ty == &AltType::RightAssoc {
708 new_a.flags = ruleflag::R_ASSOC;
709 }
710 }
711 AltType::Suffix => {
712 new_a = Alternative::new(a.v[1..a.len()].to_owned());
713 }
714 AltType::Prefix => {
715 new_a = Alternative::new(a.v[..a.len() - 1].to_owned());
716 new_a.v.push(Symbol::NT(var_i_nt[*ivar].0));
717 }
718 AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
719 }
720 new_a.flags |= a.flags & ruleflag::ALTERNATIVE_INFO;
721 new_a.origin = a.origin;
722 new_a.ambig_alt_id = Some(ambig_alt_id);
723 ambig_alt_id += 1;
724 new_a
725 }).to_vec();
726 let mut used_sym = HashSet::<Symbol>::new();
727 let mut prod_indep = new_alts.iter()
728 .zip(&pr_info)
729 .filter_map(|(nf, AltInfo { ty, .. })| if ty == &AltType::Prefix { Some(nf.clone()) } else { None })
730 .to_vec();
731 let greedy_prologue = pr_info[0].ty == AltType::Prefix && need_indep || pr_info[0].ty == AltType::RightAssoc;
734 for (i, va) in var_alts.into_iter().enumerate() {
735 let (nt, nt_loop) = var_i_nt[i];
736 let prod_nt = if let Some(nt_indep) = nt_indep_maybe {
737 prule!(nt nt_indep, nt nt_loop)
738 } else {
739 prod_indep.iter().cloned()
741 .chain(indep.iter().map(|a| {
742 let mut new_a = a.clone();
743 new_a.push(sym!(nt nt_loop));
744 new_a
745 })).collect()
746 };
747 let mut new_used_sym = Vec::<Symbol>::new();
748 let mut prod_nt_loop = va.iter().enumerate().rev().map(|(_, &a_id)| {
749 let mut f = new_alts[a_id as usize].clone();
750 f.v.push(Symbol::NT(nt_loop));
751 let sym = f.first().unwrap();
752 let is_used_sym = used_sym.contains(sym);
753 if !is_used_sym {
754 new_used_sym.push(*sym);
755 }
756 if is_used_sym || (i == 0 && greedy_prologue) {
757 f.flags |= ruleflag::GREEDY;
758 }
759 if !has_ambig && pr_info[a_id as usize].ty == AltType::Suffix {
760 self.set_flags(nt, ruleflag::PARENT_L_RECURSION);
761 self.set_flags(nt_loop, ruleflag::CHILD_L_RECURSION);
762 }
763 f
764 }).to_vec();
765 used_sym.extend(new_used_sym);
766 prod_nt_loop.push(alt!(e));
767 if i == 0 {
768 *prule = prod_nt;
769 extra_prods.push(prod_nt_loop);
770 if let Some(t) = self.symbol_table.as_mut() {
771 assert_eq!(t.add_child_nonterminal(var), var_i_nt[0].1);
772 };
773 } else {
774 extra_prods.extend([prod_nt, prod_nt_loop]);
775 if let Some(t) = self.symbol_table.as_mut() {
776 assert_eq!(t.add_child_nonterminal(var), var_i_nt[i].0);
777 assert_eq!(t.add_child_nonterminal(var), var_i_nt[i].1);
778 };
779 }
780 if has_ambig {
781 self.set_flags(nt, ruleflag::PARENT_L_RECURSION);
782 self.set_flags(nt_loop, ruleflag::CHILD_L_RECURSION);
783 }
784 }
785 if need_indep {
786 if let Some(t) = self.symbol_table.as_mut() {
787 assert_eq!(t.add_child_nonterminal(var), nt_indep_maybe.unwrap());
788 }
789 }
790 if VERBOSE {
791 println!("new alternatives: {}", new_alts.iter().enumerate()
792 .filter_map(|(i, na)| if na.is_empty() { None } else { Some(format!("[{i}] {}", na.to_str(self.get_symbol_table()))) }).join(", "));
793 println!("vars: {}{}", var_i_nt.iter().take(rule_var_i + 1).enumerate().map(|(i, (v1, v2))|
794 format!("[{i}]:{},{}", Symbol::NT(*v1).to_str(self.get_symbol_table()),
795 Symbol::NT(*v2).to_str(self.get_symbol_table()))).join(" "),
796 if need_indep { format!(" [{}]:{}", var_i, Symbol::NT(var_i_nt[var_i].0).to_str(self.get_symbol_table() )) } else { String::new() });
797 }
798 if has_ambig {
799 self.set_flags(var, ruleflag::PARENT_AMBIGUITY);
800 } else if !prod_indep.is_empty() && !need_indep {
801 self.set_flags(var, ruleflag::R_RECURSION);
802 }
803 for (nt, nt_prime) in var_i_nt.into_iter().take(rule_var_i + 1) {
804 if nt != var {
805 self.set_parent(nt, var);
806 }
807 self.set_parent(nt_prime, nt);
808 }
809 if let Some(nt_indep) = nt_indep_maybe {
810 if !has_ambig && prod_indep.iter().any(|p| p.last() == Some(&Symbol::NT(nt_indep)))
811 || has_ambig && !prod_indep.is_empty()
812 {
813 self.set_flags(nt_indep, ruleflag::R_RECURSION);
814 }
815 for alt in &mut indep {
816 if alt.v.is_empty() {
817 alt.v.push(Symbol::Empty);
818 }
819 }
820 prod_indep.extend(indep);
821 self.set_parent(nt_indep, var);
822 extra_prods.push(prod_indep);
823 }
824 self.parent.resize(var_new, None);
825 self.flags.resize(var_new, 0);
826 self.log.add_note(format!(" => {var_name} -> {}", prule_to_str(prule, self.get_symbol_table())));
827 for (v, p) in extra_prods.iter().index_start(prules.len() as VarId) {
828 self.log.add_note(
829 format!(" {} -> {}", Symbol::NT(v).to_str(self.get_symbol_table()), prule_to_str(p, self.get_symbol_table())));
830 }
831 } else if prule.iter().any(|p| !p.is_empty() && p.last().unwrap() == &symbol) && self.get_flags(var) & ruleflag::CHILD_REPEAT == 0 {
832 self.set_flags(var, ruleflag::R_RECURSION);
833 }
834 prules.extend(extra_prods);
835 let incompatibility_flags = ruleflag::R_RECURSION | ruleflag::L_FORM | ruleflag::PARENT_L_RECURSION;
836 let incompatibility_str = "<L> right recursivity + left recursivity";
837 let flags = self.get_flags(var);
838 if flags & incompatibility_flags == incompatibility_flags {
839 let tree = &self.origin.trees[var as usize];
840 self.log.add_error(format!(
841 "`{var_name} -> {}` has incompatible rule alternatives: {incompatibility_str}",
842 grtree_to_str(tree, None, None, None, self.get_symbol_table(), false)));
843 }
844 }
845 if VERBOSE {
846 println!("#prules: {}, #flags: {}, #parents:{}", prules.len(), self.flags.len(), self.parent.len());
847 if let Some(ref mut table) = self.symbol_table {
848 println!("table: {} ({})", table.get_nonterminals().join(", "), table.get_num_nt());
849 }
850 }
851 self.num_nt = prules.len();
852 self.prules = Some(prules);
853 if VERBOSE {
854 println!("FINAL:");
855 self.print_rules(false, false);
856 println!("FLAGS:\n{}", self.flags.iter().index::<VarId>()
857 .map(|(v, f)| format!("- ({v:2}) {}: {}", Symbol::NT(v).to_str(self.get_symbol_table()), ruleflag::to_string(*f).join(", "))).join("\n"));
858 if let Some(table) = &self.symbol_table {
859 println!("Symbol table:\n-NT: {}\n-T: {}",
860 table.get_nonterminals().enumerate().map(|(i, nt)| format!("{i}:{nt}")).join(", "),
861 table.get_terminals().enumerate()
862 .map(|(i, (t, txt))| format!("{i}:{t}{}", if let Some(st) = txt { format!(" ({st})") } else { String::new() })).join(", "))
863 }
864 }
865 }
866
867 pub fn left_factorize(&mut self) {
881 fn similarity(a: &Alternative, b: &Alternative) -> usize {
882 a.iter().zip(b.iter()).take_while(|(a, b)| a == b).count()
883 }
884
885 const VERBOSE: bool = false;
886 self.log.add_note("removing left factorization...");
887 let mut new_var = self.get_next_available_var();
888 let mut prules = self.prules.take().unwrap();
890 let mut i = 0;
891 while i < prules.len() {
892 let prule = &mut prules[i];
893 let var_flags = self.flags[i];
894 let var = i as VarId;
895 i += 1;
896 if prule.len() < 2 {
897 continue
898 }
899 let mut alts = prule.clone();
900 let mut extra = Vec::<ProdRule>::new();
901 let mut changed = false;
902 alts.sort();
903 if VERBOSE { println!("{var}: {} -> {}", Symbol::NT(var).to_str(self.get_symbol_table()), alts.iter().map(|f| f.to_str(self.get_symbol_table())).join(" | ")); }
904 while alts.len() > 1 {
905 let simi = alts.windows(2).enumerate()
906 .map(|(j, x)| (j, similarity(&x[0], &x[1])))
907 .skip_while(|(_, s)| *s == 0)
908 .take_while(|(_, s)| *s != 0)
909 .to_vec();
910 if simi.is_empty() {
911 if VERBOSE { println!(" nothing to factorize"); }
912 break;
913 }
914 changed = true;
915 let min = simi.iter().map(|(_, s)| *s).min().unwrap();
916 let start = simi[0].0;
917 let stop = start + simi.len();
918 let mut factorized = Alternative::new(alts[start].v.iter().take(min).cloned().to_vec());
919 let mut child = alts.drain(start..=stop).to_vec();
920 if VERBOSE { println!("child:\n{}", child.iter().enumerate().map(|(i, c)| format!("- [{i}] = {} org = {:?}", c.to_str(self.get_symbol_table()), c.origin)).join("\n")); }
921 if child.iter().all(|f| f.is_greedy()) {
922 factorized.flags |= ruleflag::GREEDY;
923 }
924 for f in &mut child {
925 f.v.drain(0..min);
926 }
927 if child[0].v.is_empty() {
928 if var_flags & ruleflag::CHILD_REPEAT != 0 {
929 factorized.origin = child[0].origin;
930 }
931 child[0].v.push(Symbol::Empty);
932 let empty = child.remove(0);
933 child.push(empty);
934 }
935 let var_prime = new_var;
936 new_var += 1;
937 self.set_flags(var, ruleflag::PARENT_L_FACTOR);
938 self.set_flags(var_prime, ruleflag::CHILD_L_FACT);
939 let rep_l_form = ruleflag::CHILD_REPEAT | ruleflag::L_FORM;
940 let top = if var_flags & rep_l_form == rep_l_form { var } else { self.get_top_parent(var) };
941 if let Some(table) = self.symbol_table.as_mut() {
942 assert_eq!(table.add_child_nonterminal(top), var_prime);
943 };
944 self.set_parent(var_prime, var);
945 let symbol_prime = Symbol::NT(var_prime);
946 factorized.v.push(symbol_prime);
947 alts.insert(start, factorized);
948 if VERBOSE {
949 println!(" - similarity: {} => {}", simi.iter().map(|(j, s)| format!("{j}:{s}")).join(", "), min);
950 println!(" factorize: {}", child.iter().map(|a| a.to_str(self.get_symbol_table())).join(" | "));
951 println!(" left: {}", alts.iter().map(|a| a.to_str(self.get_symbol_table())).join(" | "));
952 }
953 extra.push(child);
954 }
955 if changed {
956 self.log.add_note(format!(
957 "- modifying: {} -> {}",
958 Symbol::NT(var).to_str(self.get_symbol_table()), prule_to_str(prule, self.get_symbol_table())));
959 self.log.add_note(format!(
960 " => {} -> {}",
961 Symbol::NT(var).to_str(self.get_symbol_table()), prule_to_str(&alts, self.get_symbol_table())));
962 *prule = alts;
963 let offset = prules.len() as VarId;
964 for (v, p) in extra.iter().index_start(offset) {
965 self.log.add_note(format!(
966 " {} -> {}",
967 Symbol::NT(v).to_str(self.get_symbol_table()), prule_to_str(p, self.get_symbol_table())));
968 }
969 prules.extend(extra);
970 }
971 }
972 self.num_nt = prules.len();
973 self.prules = Some(prules);
974 }
975
976 pub(crate) fn remove_ambiguity(&self) {
977 todo!()
978 }
979
980 fn transfer_alt_flags(&mut self) {
982 const FLAG_MASK: u32 = ruleflag::L_FORM;
984
985 for (v, prule) in self.prules.as_mut().unwrap().iter_mut().enumerate() {
986 let flags = prule.iter().fold(0, |acc, alt| acc | alt.flags) & FLAG_MASK;
987 self.flags[v] |= flags;
988 for alt in prule.iter_mut() {
989 alt.flags &= !FLAG_MASK;
990 }
991 }
992 }
993
994 fn check_flags(&mut self) {
995 const FLAG_CHECK_MASK: u32 = ruleflag::L_FORM | ruleflag::CHILD_REPEAT | ruleflag::R_RECURSION;
996 for v in 0..self.num_nt {
997 if self.flags[v] & FLAG_CHECK_MASK == ruleflag::L_FORM {
998 if self.flags[v] & ruleflag::CHILD_L_FACT == 0 || self.flags[self.parent[v].unwrap() as usize] & ruleflag::R_RECURSION == 0 {
1000 self.log.add_error(format!("{} has an illegal flag L-Form (only used with +, *, or right recursion): {}",
1001 Symbol::NT(v as VarId).to_str(self.get_symbol_table()),
1002 ruleflag::to_string(self.flags[v]).join(" ")
1003 ));
1004 }
1005 }
1006 }
1007 }
1008
1009 pub fn prs_alt_origins_str(&self) -> Vec<String> {
1014 let mut cols = vec![vec!["| ProdRuleSet".to_string(), "|".to_string(), "Original rules".to_string()]];
1015 cols.extend(self.get_prules_iter()
1017 .flat_map(|(v, prule)| prule.iter()
1018 .map(move |alt| vec![
1019 format!("| {}", alt_to_rule_str(v, &alt.v, self.get_symbol_table())),
1020 "|".to_string(),
1021 if let Some((vo, ido)) = alt.origin {
1022 let tree = &self.origin.trees[vo as usize];
1023 let emphasis = if tree.get_root() == Some(ido) { None } else { Some(ido) };
1024 let orig_rule = grtree_to_str_custom(
1025 tree,
1026 None,
1027 emphasis,
1028 Some(vo),
1029 self.get_symbol_table(),
1030 false,
1031 self.options.ansi);
1032 format!("{} -> {orig_rule}", Symbol::NT(vo).to_str(self.get_symbol_table()))
1033 } else {
1034 String::new()
1035 }
1036 ])
1037 ));
1038 let n1 = cols.len();
1039 cols.extend((0..self.num_nt)
1041 .filter_map(|var|
1042 self.parent[var]
1043 .and_then(|_| self.origin.map
1044 .get(&(var as VarId))
1045 .map(|&(v, index)| (var as VarId, v, index))))
1046 .map(|(var, v, index)| vec![
1047 format!("| {}", Symbol::NT(var).to_str(self.get_symbol_table())),
1048 "|".to_string(),
1049 format!("{} -> {}",
1050 Symbol::NT(v).to_str(self.get_symbol_table()),
1051 grtree_to_str_custom(
1052 &self.origin.trees[v as usize],
1053 None,
1054 Some(index),
1055 None,
1056 self.get_symbol_table(),
1057 false,
1058 self.options.ansi))
1059
1060 ]));
1061 let mut lines = columns_to_str(cols, None);
1062 let max = lines.iter().map(|s| s.replace(STR_BEFORE_ANSI, "").replace(STR_AFTER_ANSI, "").charlen()).max().unwrap();
1063 let sep = format!("{:-<1$}", "", max);
1064 lines.push(sep.clone());
1065 lines.insert(n1, sep.clone());
1066 lines.insert(1, sep);
1067 lines
1068 }
1069}
1070
1071impl<T> LogReader for ProdRuleSet<T> {
1072 type Item = BufLog;
1073
1074 fn get_log(&self) -> &Self::Item {
1075 &self.log
1076 }
1077
1078 fn give_log(self) -> Self::Item {
1079 self.log
1080 }
1081}
1082
1083impl<T> HasBuildErrorSource for ProdRuleSet<T> {
1084 const SOURCE: BuildErrorSource = BuildErrorSource::ProdRuleSet;
1085}
1086
1087impl ProdRuleSet<General> {
1088 fn with_capacity(capacity: usize) -> Self {
1089 Self {
1090 prules: Some(Vec::with_capacity(capacity)),
1091 origin: Origin::new(),
1092 num_nt: 0,
1093 num_t: 0,
1094 symbol_table: None,
1095 flags: Vec::with_capacity(capacity),
1096 parent: Vec::with_capacity(capacity),
1097 start: None,
1098 name: None,
1099 nt_conversion: HashMap::new(),
1100 log: BufLog::new(),
1101 options: ProdRuleSetOptions::default(),
1102 _phantom: PhantomData
1103 }
1104 }
1105}
1106
1107impl ProdRuleSet<LL1> {
1108 pub(super) fn calc_table(&mut self, first: &HashMap<Symbol, HashSet<Symbol>>, follow: &HashMap<Symbol, HashSet<Symbol>>, error_recovery: bool)
1117 -> LLParsingTable
1118 {
1119 fn add_table(table: &mut [Vec<AltId>], num_t: usize, nt_id: VarId, t_id: VarId, a_id: AltId) {
1120 let pos = nt_id as usize * num_t + t_id as usize;
1121 table[pos].push(a_id);
1122 }
1123 const VERBOSE: bool = false;
1124 const DISABLE_FILTER: bool = false;
1125 if !self.log.has_no_errors() {
1126 return LLParsingTable::new();
1127 }
1128 if VERBOSE {
1129 fn p_hash_hash(title: &str, tbl: Option<&SymbolTable>, table: &HashMap<Symbol, HashSet<Symbol>>) {
1130 let mut syms = table.iter().map(|(s, hs)| {
1131 let mut f = hs.iter().cloned().to_vec();
1132 f.sort();
1133 (*s, f)
1134 }).to_vec();
1135 syms.sort();
1136 println!("{title}\n{}",
1137 syms.into_iter().map(|(s, f)| format!("- {} -> {}", s.to_str(tbl), f.into_iter().map(|s2| s2.to_str(tbl)).join(", "))).join("\n"));
1138 }
1139 p_hash_hash("first:", self.get_symbol_table(), first);
1140 p_hash_hash("follow:", self.get_symbol_table(), follow);
1141 }
1142 let mut alts = self.prules.as_ref().unwrap().iter().index().filter(|(v, _)| DISABLE_FILTER || first.contains_key(&Symbol::NT(*v)))
1143 .flat_map(|(v, x)| x.iter().map(move |a| (v, a.clone()))).to_vec();
1144 let error_skip = alts.len() as AltId; let error_pop = error_skip + 1; let num_nt = self.num_nt;
1147 let num_t = self.num_t + 1;
1148 let end = (num_t - 1) as VarId; let mut used_t = HashSet::<Symbol>::new();
1150 let mut table: Vec<Vec<AltId>> = vec![vec![]; num_nt * num_t];
1151 for (a_id, (nt_id, alt)) in alts.iter().index() {
1152 used_t.extend(alt.iter().filter(|s| s.is_t()));
1153 if VERBOSE { println!("- {a_id}: {} -> {} => {}", Symbol::NT(*nt_id).to_str(self.get_symbol_table()),
1154 alt.to_str(self.get_symbol_table()),
1155 alt.calc_alt_first(first).iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
1156 let mut has_end = false;
1157 let mut has_empty = false;
1158 for s in alt.calc_alt_first(first) {
1159 match s {
1160 Symbol::Empty => {
1161 has_empty = true;
1162 for s in &follow[&Symbol::NT(*nt_id)] {
1163 match s {
1164 Symbol::T(t_id) => add_table(&mut table, num_t, *nt_id, *t_id, a_id),
1165 Symbol::End => add_table(&mut table, num_t, *nt_id, end, a_id),
1166 _ => {}
1167 }
1168 }
1169 }
1170 Symbol::T(t_id) => {
1171 add_table(&mut table, num_t, *nt_id, t_id, a_id);
1172 }
1173 Symbol::NT(_) => {}
1174 Symbol::End => {
1175 has_end = true;
1176 }
1177 }
1178 }
1179 if has_empty && has_end {
1180 add_table(&mut table, num_t, *nt_id, end, end);
1181 }
1182 }
1183 let mut final_table = Vec::<AltId>::new();
1185 for nt_id in 0..num_nt {
1186 for t_id in 0..num_t {
1187 let pos = nt_id * num_t + t_id;
1188 final_table.push(match table[pos].len() {
1189 0 => {
1190 if error_recovery {
1191 let sym_t = if t_id < num_t - 1 { Symbol::T(t_id as TokenId) } else { Symbol::End };
1192 let sym_nt = Symbol::NT(nt_id as VarId);
1193 if follow[&sym_nt].contains(&sym_t) || first[&sym_nt].contains(&sym_t) {
1194 error_pop
1195 } else {
1196 error_skip
1197 }
1198 } else {
1199 error_skip
1200 }
1201 },
1202 1 => *table[pos].first().unwrap(),
1203 _ => {
1204 let greedies = table[pos].iter().filter(|&a_id| alts[*a_id as usize].1.is_greedy()).cloned().to_vec();
1206 if greedies.len() == 1 {
1207 let chosen = greedies[0];
1208 self.log.add_note(
1209 format!(" - calc_table: expected ambiguity for NT '{}', T '{}': {} => <{}> is specified as greedy and has been chosen",
1210 Symbol::NT(nt_id as VarId).to_str(self.get_symbol_table()),
1211 if t_id < self.num_t { Symbol::T(t_id as VarId).to_str(self.get_symbol_table()) } else { "<EOF>".to_string() },
1212 table[pos].iter().map(|a_id|
1213 format!("<{}>", alts[*a_id as usize].1.to_str(self.get_symbol_table()))).join(" or "),
1214 alts[chosen as usize].1.to_str(self.get_symbol_table())
1215 ));
1216 table[pos] = greedies;
1217 chosen
1218 } else {
1219 let row = (0..num_t).filter(|j| *j != t_id).flat_map(|j| &table[nt_id * num_t + j]).collect::<HashSet<_>>();
1220 let chosen = *table[pos].iter().find(|a| !row.contains(a)).unwrap_or(&table[pos][0]);
1221 self.log.add_warning(
1222 format!("- calc_table: ambiguity for NT '{}', T '{}': {} => <{}> has been chosen",
1223 Symbol::NT(nt_id as VarId).to_str(self.get_symbol_table()),
1224 if t_id < self.num_t { Symbol::T(t_id as VarId).to_str(self.get_symbol_table()) } else { "<EOF>".to_string() },
1225 table[pos].iter().map(|a_id|
1226 format!("<{}>", alts[*a_id as usize].1.to_str(self.get_symbol_table()))).join(" or "),
1227 alts[chosen as usize].1.to_str(self.get_symbol_table())
1228 ));
1229 table[pos] = vec![chosen];
1230 chosen
1231 }
1232 }
1233 });
1234 }
1235 }
1236 if !(0..num_t - 1).any(|t_id| (0..num_nt).any(|nt_id| final_table[nt_id * num_t + t_id] < error_skip)) {
1237 self.log.add_error("- calc_table: no terminal used in the table".to_string());
1238 }
1239 for (_, a) in &mut alts {
1240 a.flags &= !ruleflag::GREEDY;
1241 }
1242 let table = LLParsingTable { num_nt, num_t, alts, table: final_table, flags: self.flags.clone(), parent: self.parent.clone() };
1243 self.log.add_info("parsing table:");
1244 self.log.extend_messages(
1245 table.to_str(self.get_symbol_table()).into_iter()
1246 .map(LogMsg::Info)
1247 );
1248 table
1249 }
1250
1251 pub fn make_parsing_table(&mut self, error_recovery: bool) -> LLParsingTable {
1252 self.log.add_note("- calculating parsing table...");
1253 let first = self.calc_first();
1254 let follow = self.calc_follow(&first);
1255 self.calc_table(&first, &follow, error_recovery)
1256 }
1257
1258 pub fn gen_tables_source_code(&self, indent: usize) -> String {
1259 let st = self.symbol_table.as_ref().unwrap();
1260 let mut source = Vec::<String>::new();
1261 source.push(format!("static ORIGIN: [(Option<usize>, &[(GrNode, &[usize])]); {}] = [", self.origin.trees.len()));
1263 for t in &self.origin.trees {
1264 let tree_str = (0..t.len())
1265 .map(|i| format!("({}, &[{}])", t.get(i).gen_source_code(), t.children(i).iter().join(",")))
1266 .join(", ");
1267 source.push(format!(" ({:?}, &[{}]),", t.get_root(), tree_str));
1268 }
1269 source.push("];".to_string());
1270 source.push(format!("static MAP: [(VarId, (VarId, usize)); {}] = [", self.origin.map.len()));
1271 let mut sorted_map = self.origin.map.iter().to_vec();
1272 sorted_map.sort(); source.extend(sorted_map.chunks(5)
1274 .map(|chk| format!(" {},", chk.iter().map(|(a, (c, d))| format!("({a}, ({c}, {d}))")).join(", "))));
1275 source.push("];".to_string());
1276 source.push("let origin = Origin::from_data(".to_string());
1277 source.push(" ORIGIN.into_iter().map(|(root, nodes)| GrTree::from((root, nodes.to_vec()))).collect(),".to_string());
1278 source.push(" HashMap::from(MAP));".to_string());
1279 source.push(String::new());
1281 source.push("let ll1_tables = ProdRuleSetTables::new(".to_string());
1282 source.push(format!(" {:?},", self.name));
1283 source.push(" vec![".to_string());
1284 source.extend(self.prules.as_ref().unwrap().iter().map(|prule| format!(" {},", prule_to_macro(prule))));
1285 source.push(" ],".to_string());
1286 source.push(" origin,".to_string());
1287 source.push(format!(" vec![{}],", st.get_terminals().map(|x| format!("{x:?}")).join(", ")));
1288 source.push(format!(" vec![{}],", st.get_nonterminals().map(|x| format!("{x:?}")).join(", ")));
1289 source.push(format!(" vec![{}],", self.flags.iter().join(", ")));
1290 source.push(format!(" vec![{}],", self.parent.iter().map(|p_maybe| format!("{p_maybe:?}")).join(", ")));
1291 source.push(format!(" {:?},", self.start));
1292 source.push(format!(" {:?},", self.options));
1293 source.push(format!(" hashmap![{}]", self.nt_conversion.iter().map(|(v, conv)| format!("{v} => {conv:?}")).join(", ")));
1294 source.push(");".to_string());
1295 indent_source(vec![source], indent)
1296 }
1297}
1298
1299pub struct ProdRuleSetTables {
1302 name: Option<String>,
1303 prules: Vec<ProdRule>,
1304 origin: Origin<VarId, FromPRS>,
1305 t: Vec<(String, Option<String>)>, nt: Vec<String>, flags: Vec<u32>,
1308 parent: Vec<Option<VarId>>,
1309 start: Option<VarId>,
1310 options: ProdRuleSetOptions,
1311 nt_conversion: HashMap<VarId, NTConversion>,
1312}
1313
1314impl ProdRuleSetTables {
1315 pub fn new<T: Into<String>>(
1316 name: Option<T>,
1317 prules: Vec<ProdRule>,
1318 origin: Origin<VarId, FromPRS>,
1319 t: Vec<(T, Option<T>)>,
1320 nt: Vec<T>,
1321 flags: Vec<u32>,
1322 parent: Vec<Option<VarId>>,
1323 start: Option<VarId>,
1324 options: ProdRuleSetOptions,
1325 nt_conversion: HashMap<VarId, NTConversion>,
1326 ) -> Self {
1327 let t = t.into_iter().map(|(t, t_maybe)| (t.into(), t_maybe.map(|t| t.into()))).collect();
1328 let nt = nt.into_iter().map(|nt| nt.into()).collect();
1329 ProdRuleSetTables {
1330 name: name.map(|s| s.into()),
1331 prules,
1332 origin, t, nt, flags, parent, start, options,
1333 nt_conversion,
1334 }
1335 }
1336
1337 pub fn get_name(&self) -> Option<&String> {
1338 self.name.as_ref()
1339 }
1340}
1341
1342impl BuildFrom<ProdRuleSetTables> for ProdRuleSet<LL1> {
1343 fn build_from(source: ProdRuleSetTables) -> Self {
1344 let mut symbol_table = SymbolTable::new();
1345 symbol_table.extend_terminals(source.t);
1346 symbol_table.extend_nonterminals(source.nt);
1347 ProdRuleSet {
1348 prules: Some(source.prules),
1349 origin: source.origin,
1350 num_nt: symbol_table.get_num_nt(),
1351 num_t: symbol_table.get_num_t(),
1352 symbol_table: Some(symbol_table),
1353 flags: source.flags,
1354 parent: source.parent,
1355 start: source.start,
1356 name: source.name,
1357 nt_conversion: source.nt_conversion,
1358 log: BufLog::new(),
1359 options: source.options,
1360 _phantom: PhantomData,
1361 }
1362 }
1363}
1364
1365impl BuildFrom<RuleTreeSet<Normalized>> for ProdRuleSet<General> {
1368 fn build_from(mut rules: RuleTreeSet<Normalized>) -> Self {
1373 fn children_to_vec(tree: &GrTree, parent_id: usize) -> Alternative {
1374 let mut flags: u32 = 0;
1375 let alt = tree.children(parent_id).iter()
1376 .map(|id| tree.get(*id))
1377 .filter(|node| {
1378 match **node {
1379 GrNode::RAssoc => {
1380 flags |= ruleflag::R_ASSOC;
1381 false
1382 }
1383 GrNode::LForm(_) => {
1384 flags |= ruleflag::L_FORM;
1385 false
1386 }
1387 GrNode::PrecEq => {
1388 flags |= ruleflag::PREC_EQ;
1389 false
1390 }
1391 GrNode::Greedy => {
1392 flags |= ruleflag::GREEDY;
1393 false
1394 }
1395 _ => true,
1396 }
1397 })
1398 .map(|node| {
1399 match node {
1400 GrNode::Symbol(s) => *s,
1401 x => panic!("unexpected symbol {x} under &")
1402 }
1403 }).to_vec();
1404 Alternative::new(alt).with_flags(flags)
1405 }
1406 let mut prules = Self::with_capacity(rules.trees.len());
1407 prules.start = rules.start;
1408 prules.symbol_table = rules.symbol_table;
1409 prules.flags = rules.flags;
1410 prules.parent = rules.parent;
1411 prules.nt_conversion = rules.nt_conversion;
1412 prules.log = rules.log;
1413 if !prules.log.has_no_errors() {
1414 return prules;
1418 }
1419 prules.origin = Origin::<VarId, FromPRS>::from_trees_mut(&mut rules.origin.trees);
1420 for (var, tree) in rules.trees.iter().index() {
1421 if !tree.is_empty() {
1422 let root = tree.get_root().expect("tree {var} has no root");
1423 let root_sym = tree.get(root);
1424 let mut prule = match root_sym {
1425 GrNode::Symbol(s) => {
1426 let mut alt = Alternative::new(vec![*s]);
1427 if let Some(&(v, ch)) = rules.origin.map.get(&(var, root)) {
1428 alt.origin = Some((v, ch));
1429 }
1430 vec![alt]
1431 },
1432 GrNode::Concat => {
1433 let mut alt = children_to_vec(tree, root);
1434 if let Some(&(v, ch)) = rules.origin.map.get(&(var, root)) {
1435 alt.origin = Some((v, ch));
1436 }
1437 vec![alt]
1438 },
1439 GrNode::Or => tree.children(root).iter()
1440 .map(|id| {
1441 let child = tree.get(*id);
1442 let mut alt = if let GrNode::Symbol(s) = child {
1443 Alternative::new(vec![*s])
1444 } else {
1445 assert_eq!(*child, GrNode::Concat, "unexpected symbol {child} under |");
1446 children_to_vec(tree, *id)
1447 };
1448 if let Some(&(v, ch)) = rules.origin.map.get(&(var, *id)) {
1449 alt.origin = Some((v, ch));
1450 }
1451 alt
1452 }).to_vec(),
1453 s => panic!("unexpected symbol {s} as root of normalized GrTree for NT {}", Symbol::NT(var).to_str(prules.get_symbol_table()))
1454 };
1455 if prule.iter().any(|f| f.flags & ruleflag::L_FORM != 0) {
1456 prules.set_flags(var, ruleflag::L_FORM);
1465 for a in prule.iter_mut() {
1467 a.flags &= !ruleflag::L_FORM;
1468 }
1469 }
1470 if prules.parent[var as usize].is_some() {
1473 if let Some(&(v, index)) = rules.origin.map.get(&(var, root)) {
1474 prules.origin.add(var, (v, index));
1475 }
1476 }
1477 prules.prules.as_mut().unwrap().push(prule);
1478 } else {
1479 prules.prules.as_mut().unwrap().push(ProdRule::new()); }
1481 }
1482 prules.calc_num_symbols();
1483 prules
1484 }
1485}
1486
1487impl BuildFrom<RuleTreeSet<General>> for ProdRuleSet<General> {
1488 fn build_from(rules: RuleTreeSet<General>) -> Self {
1493 let rts = RuleTreeSet::<Normalized>::build_from(rules);
1494 let mut prs = ProdRuleSet::build_from(rts);
1495 if prs.log.has_no_errors() {
1496 prs.simplify();
1497 }
1498 prs
1499 }
1500}
1501
1502impl BuildFrom<ProdRuleSet<General>> for ProdRuleSet<LL1> {
1503 fn build_from(mut rules: ProdRuleSet<General>) -> Self {
1504 if rules.log.has_no_errors() {
1505 rules.remove_recursion();
1506 rules.left_factorize();
1507 rules.transfer_alt_flags();
1508 rules.check_flags();
1509 rules.log.add_note("final rule set:");
1510 rules.log.extend_messages(
1511 rules.prs_alt_origins_str().into_iter().map(LogMsg::Note)
1512 );
1513 }
1514 ProdRuleSet::<LL1> {
1515 prules: rules.prules,
1516 origin: rules.origin,
1517 num_nt: rules.num_nt,
1518 num_t: rules.num_t,
1519 symbol_table: rules.symbol_table,
1520 flags: rules.flags,
1521 parent: rules.parent,
1522 start: rules.start,
1523 name: rules.name,
1524 nt_conversion: rules.nt_conversion,
1525 log: rules.log,
1526 options: rules.options,
1527 _phantom: PhantomData,
1528 }
1529 }
1530}
1531
1532impl BuildFrom<ProdRuleSet<General>> for ProdRuleSet<LR> {
1533 fn build_from(mut rules: ProdRuleSet<General>) -> Self {
1534 if rules.log.has_no_errors() {
1535 rules.remove_ambiguity();
1536 rules.transfer_alt_flags();
1537 rules.check_flags();
1538 }
1539 ProdRuleSet::<LR> {
1540 prules: rules.prules,
1541 origin: rules.origin,
1542 num_nt: rules.num_nt,
1543 num_t: rules.num_t,
1544 symbol_table: rules.symbol_table,
1545 flags: rules.flags,
1546 parent: rules.parent,
1547 start: rules.start,
1548 name: rules.name,
1549 nt_conversion: rules.nt_conversion,
1550 log: rules.log,
1551 options: rules.options,
1552 _phantom: PhantomData,
1553 }
1554 }
1555}
1556
1557impl<T> ProdRuleSet<T> {
1561 pub fn print_rules(&self, as_comment: bool, filter_empty_nt: bool) {
1562 let prefix = if as_comment { " // " } else { " " };
1563 println!("{prefix}{}",
1564 self.get_prules_iter()
1565 .filter(|(_, rule)| !filter_empty_nt || **rule != prule!(e))
1566 .map(|(var, p)|
1567 format!("({var}) {} -> {}",
1568 Symbol::NT(var).to_str(self.get_symbol_table()),
1569 prule_to_str(p, self.get_symbol_table())))
1570 .join(&format!("\n{prefix}")));
1571 }
1572
1573 pub fn print_alts(&self) {
1574 println!("Alternatives:\n{}",
1575 self.get_alts().enumerate().map(|(id, (v, a))|
1576 format!(" // - {id}: {} -> {}{}",
1577 Symbol::NT(v).to_str(self.get_symbol_table()),
1578 a.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
1579 if a.flags != 0 { format!(" {} ({})", ruleflag::to_string(a.flags).join(" | "), a.flags) } else { "".to_string() }
1580 )
1581 ).join("\n"));
1582 }
1583}
1584
1585impl LLParsingTable {
1586 pub fn to_str(&self, symbol_table: Option<&SymbolTable>) -> Vec<String> {
1587 let LLParsingTable { num_nt, num_t, alts, table, .. } = self;
1588 let error_skip = alts.len() as AltId;
1589 let error_pop = error_skip + 1;
1590 let str_nt = (0..*num_nt).map(|i| Symbol::NT(i as VarId).to_str(symbol_table)).to_vec();
1591 let max_nt_len = str_nt.iter().map(|s| s.len()).max().unwrap();
1592 let str_t = (0..*num_t).map(|j| if j + 1 < *num_t { Symbol::T(j as VarId).to_str_quote(symbol_table) } else { "$".to_string() }).to_vec();
1593 let t_len = str_t.iter().map(|s| s.len().max(3)).to_vec();
1594 let mut lines = vec![];
1595 lines.push(format!("{:<w$} | {}", "", (0..*num_t).map(|j| format!("{:^w$}", str_t[j], w = t_len[j])).join(" "), w = max_nt_len));
1596 lines.push(format!("{:-<w$}-+-{:-<t$}", "", "", w = max_nt_len, t = *num_t + t_len.iter().sum::<usize>()));
1597 for i in 0..*num_nt {
1598 let mut line = format!("{:<w$} |", str_nt[i], w = max_nt_len);
1599 for j in 0..*num_t {
1600 let value = table[i * num_t + j];
1601 if value < error_skip {
1602 line.push_str(&format!(" {:^w$}", value, w = t_len[j]));
1603 } else {
1604 line.push_str(&format!(" {:^w$}", if value == error_pop { "p" } else { "." }, w = t_len[j]));
1605 }
1606 }
1607 lines.push(line);
1608 }
1609 lines
1610 }
1611
1612 pub fn print(&self, symbol_table: Option<&SymbolTable>, indent: usize) {
1613 let lines = self.to_str(symbol_table);
1614 for line in lines {
1615 println!("{:<1$}// {line}", "", indent);
1616 }
1617 }
1618}