1pub mod tests;
4pub mod origin;
5mod prs;
6
7pub use prs::*;
8
9use std::collections::{HashMap, HashSet};
10use std::fmt::{Display, Formatter};
11use std::marker::PhantomData;
12use std::mem::take;
13use std::ops::{Deref, DerefMut};
14use iter_index::IndexerIterator;
15use vectree::VecTree;
16use lexigram_core::alt::ruleflag;
17use lexigram_core::CollectJoin;
18use crate::cproduct::CProduct;
19use crate::{alt, gnode, hashset, indent_source, prule, sym, vaddi, General, Normalized, TokenId, VarId, LL1, LR};
20use crate::fixed_sym_table::SymInfoTable;
21use crate::grammar::NTConversion::{MovedTo, Removed};
22use crate::grammar::origin::{FromPRS, FromRTS, Origin};
23use lexigram_core::log::{BufLog, LogReader, LogStatus, Logger};
24use crate::build::{BuildErrorSource, BuildFrom, HasBuildErrorSource};
25use crate::parser::Symbol;
26use crate::SymbolTable;
27
28#[derive(Clone, Copy, PartialEq, Debug)]
29pub enum GrNode {
30 Symbol(Symbol),
31 Concat,
32 Or,
33 Maybe,
34 Plus,
35 Star,
36 LForm(VarId), RAssoc, PrecEq, Instance, }
68
69impl Display for GrNode {
70 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
71 match self {
72 GrNode::Symbol(s) => write!(f, "{s}"),
73 GrNode::Concat => write!(f, "&"),
74 GrNode::Or => write!(f, "|"),
75 GrNode::Maybe => write!(f, "?"),
76 GrNode::Plus => write!(f, "+"),
77 GrNode::Star => write!(f, "*"),
78 GrNode::LForm(v) => write!(f, "<L={v}>"),
79 GrNode::RAssoc => write!(f, "<R>"),
80 GrNode::PrecEq => write!(f, "<P>"),
81 GrNode::Instance => write!(f, "inst "),
82 }
83 }
84}
85
86impl GrNode {
87 pub fn to_str(&self, symbol_table: Option<&SymbolTable>) -> String {
88 match self {
89 GrNode::Symbol(s) => symbol_table.map(|t| t.get_str(s)).unwrap_or(s.to_string()),
90 GrNode::LForm(v) => format!("<L={}>", symbol_table.map(|t| t.get_str(&Symbol::NT(*v))).unwrap_or(v.to_string())),
91 _ => self.to_string()
92 }
93 }
94
95 pub fn gen_source_code(&self) -> String {
96 match self {
97 GrNode::Symbol(s) => format!("gnode!({})", s.to_macro_item()),
98 GrNode::Concat => "gnode!(&)".to_string(),
99 GrNode::Or => "gnode!(|)".to_string(),
100 GrNode::Maybe => "gnode!(?)".to_string(),
101 GrNode::Plus => "gnode!(+)".to_string(),
102 GrNode::Star => "gnode!(*)".to_string(),
103 GrNode::LForm(v) => format!("gnode!(L {v})"),
104 GrNode::RAssoc => "gnode!(R)".to_string(),
105 GrNode::PrecEq => "gnode!(P)".to_string(),
106 GrNode::Instance => "gnode!(inst)".to_string(),
107 }
108 }
109
110 pub fn is_modifier(&self) -> bool {
111 matches!(self, GrNode::LForm(_) | GrNode::RAssoc | GrNode::PrecEq)
112 }
113
114 pub fn is_empty(&self) -> bool {
115 matches!(self, GrNode::Symbol(Symbol::Empty))
116 }
117}
118
119#[derive(Clone, Copy)]
125pub struct Dup {
126 index: u32
127}
128
129#[derive(Clone, Copy, Debug)]
130enum DupVal {
131 Original(u32),
132 Copy(u32)
133}
134
135impl Dup {
136 const MASK: u32 = 1 << (u32::BITS - 1);
137
138 fn new(index: usize) -> Self {
139 assert!(index < Self::MASK as usize);
140 Self { index: index as u32 }
141 }
142
143 fn get(&mut self) -> DupVal {
144 let idx = self.index;
145 if idx & Self::MASK == 0 {
146 self.index |= Self::MASK;
147 DupVal::Original(idx)
148 } else {
149 DupVal::Copy(idx & !Self::MASK)
150 }
151 }
152}
153
154impl std::fmt::Debug for Dup {
155 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
156 write!(f, "Dup{{")?;
157 if self.index & Self::MASK == 0 {
158 write!(f, "{}}}", self.index)
159 } else {
160 write!(f, "copy {}}}", self.index & !Dup::MASK)
161 }
162 }
163}
164
165pub type GrTree = VecTree<GrNode>;
168
169pub fn grtree_to_str_custom(tree: &GrTree, node: Option<usize>, emphasis: Option<usize>, nt: Option<VarId>, symbol_table: Option<&SymbolTable>, simplified: bool, ansi: bool)
173 -> String
174{
175 fn pr_join(children: Vec<(u32, String)>, str: &str, pr: u32) -> (u32, String) {
176 (pr, children.into_iter()
177 .map(|(p_ch, ch)| if p_ch >= pr { ch } else { format!("({ch})") })
178 .join(str))
179 }
180
181 fn pr_append(child: (u32, String), str: &str, pr: u32) -> (u32, String) {
182 (pr, if child.0 >= pr { format!("{}{str}", child.1) } else { format!("({}){str}", child.1) })
183 }
184
185 const PR_PROD: u32 = 1;
186 const PR_TERM: u32 = 2;
187 const PR_FACTOR: u32 = 3;
188 const PR_ATOM: u32 = 4;
189
190 const BEFORE: &str = " ►► ";
191 const AFTER : &str = " ◄◄ ";
192 const BEFORE_ANSI: &str = "\u{1b}[4;1;36m";
193 const AFTER_ANSI : &str = "\u{1b}[0m";
194
195 let mut children = vec![];
196 if tree.is_empty() {
197 return "<empty>".to_string();
198 }
199 let before = if ansi { BEFORE_ANSI } else { BEFORE };
200 let after = if ansi { AFTER_ANSI } else { AFTER };
201 let top = node.unwrap_or_else(|| tree.get_root().unwrap());
202 for node in tree.iter_depth_simple_at(top) {
203 let (pr, mut str) = match node.num_children() {
204 0 => {
205 match node.deref() {
206 GrNode::Symbol(s) => (PR_ATOM, s.to_str_quote(symbol_table)),
207 GrNode::LForm(var) => (PR_ATOM, if simplified || nt == Some(*var) { "<L>".to_string() } else { format!("<L={}>", Symbol::NT(*var).to_str(symbol_table)) }),
208 GrNode::RAssoc => (PR_ATOM, "<R>".to_string()),
209 GrNode::PrecEq => (PR_ATOM, "<P>".to_string()),
210 s => panic!("{s:?} should have children"),
211 }
212 }
213 n => {
214 let mut node_children = children.split_off(children.len() - n);
215 match node.deref() {
216 GrNode::Concat => pr_join(node_children, " ", PR_TERM),
217 GrNode::Or => pr_join(node_children, " | ", PR_PROD),
218 GrNode::Maybe => pr_append(node_children.pop().unwrap(), "?", PR_FACTOR),
219 GrNode::Plus => pr_append(node_children.pop().unwrap(), "+", PR_FACTOR),
220 GrNode::Star => pr_append(node_children.pop().unwrap(), "*", PR_FACTOR),
221 GrNode::Instance => pr_join(node_children, " ", PR_FACTOR),
222 s => panic!("{s:?} shouldn't have {n} child(ren)"),
223 }
224 }
225 };
226 if Some(node.index) == emphasis {
227 str = format!("{before}{str}{after}");
228 }
229 children.push((pr, str));
230 }
231 children.pop().unwrap().1
232}
233
234pub fn grtree_to_str(tree: &GrTree, node: Option<usize>, emphasis: Option<usize>, var: Option<VarId>, symbol_table: Option<&SymbolTable>, simplified: bool) -> String {
235 grtree_to_str_custom(tree, node, emphasis, var, symbol_table, simplified, false)
236}
237
238pub fn grtree_to_str_ansi(tree: &GrTree, node: Option<usize>, emphasis: Option<usize>, var: Option<VarId>, symbol_table: Option<&SymbolTable>, simplified: bool) -> String {
239 grtree_to_str_custom(tree, node, emphasis, var, symbol_table, simplified, true)
240}
241
242fn grtree_cleanup(tree: &mut GrTree, top: Option<usize>, del_empty_term: bool) -> Option<(bool, bool)> {
263 const VERBOSE: bool = false;
264 let root = top.unwrap_or_else(|| tree.get_root().unwrap());
265 let mut had_empty_term = false;
266 let (terms, is_or) = match tree.get(root) {
267 GrNode::Symbol(s) => {
268 let is_empty = *s == Symbol::Empty;
269 return Some((is_empty, is_empty));
270 }
271 GrNode::Concat => (vec![root], false),
272 GrNode::Or => (tree.children(root).to_owned(), true),
273 GrNode::Maybe | GrNode::Plus | GrNode::Star | GrNode::LForm(_)
275 | GrNode::RAssoc | GrNode::PrecEq | GrNode::Instance => { return None }
276 };
277 let terms_len = terms.len();
278 let mut empty_terms = vec![];
279 for (term_pos, term) in terms.into_iter().enumerate() {
280 match *tree.get(term) {
281 GrNode::Concat => {
282 let children = tree.children(term);
283 let len = children.len();
284 let mut empty_pos = vec![];
285 let n_modifiers = children.iter().enumerate()
286 .fold(0, |n_mod, (pos, &index)| {
287 let n = tree.get(index);
288 if n.is_empty() {
289 empty_pos.push(pos);
290 }
291 n_mod + if n.is_modifier() { 1 } else { 0 }
292 });
293 if VERBOSE { print!("- term {} => {empty_pos:?} empty, {n_modifiers} modifier", tree.to_str_index(Some(term), None)); }
294 if empty_pos.len() + n_modifiers == len {
295 *tree.get_mut(term) = gnode!(e);
296 tree.children_mut(term).clear();
297 empty_terms.push(term_pos);
298 if VERBOSE { println!(" (replacing everything with ε)"); }
299 } else if !empty_pos.is_empty() {
300 if VERBOSE { println!(" => (removing {} ε)", empty_pos.len()); }
301 let new_children = tree.children_mut(term);
302 for i in empty_pos.into_iter().rev() {
303 new_children.remove(i);
304 }
305 } else {
306 if VERBOSE { println!(" (nothing to do)"); }
307 }
308 }
309 GrNode::Symbol(Symbol::Empty) => {
310 empty_terms.push(term_pos);
311 }
312 n if n.is_modifier() => { *tree.get_mut(term) = gnode!(e);
314 empty_terms.push(term_pos);
315 }
316 _ => {}
317 }
318 }
319 if VERBOSE { println!(" {} empty terms: {empty_terms:?}", empty_terms.len()); }
320 if !empty_terms.is_empty() {
321 had_empty_term = true;
322 if is_or {
323 if !del_empty_term && terms_len > 1 || empty_terms.len() == terms_len {
324 empty_terms.pop();
325 }
326 let or_children = tree.children_mut(root);
327 for i in empty_terms.into_iter().rev() {
328 or_children.remove(i);
329 }
330 if VERBOSE { println!("or_children => {or_children:?}"); }
331 }
332 }
333 if is_or && tree.children(root).len() == 1 {
334 let subroot_index = tree.children(root)[0];
336 let subroot = *tree.get(subroot_index);
337 let subroot_children = tree.children(subroot_index).to_owned();
338 *tree.get_mut(root) = subroot;
339 *tree.children_mut(root) = subroot_children;
340 }
341 let is_empty = match tree.get(root) {
342 GrNode::Symbol(s) => s.is_empty(),
343 GrNode::Concat => false, GrNode::Or => false, GrNode::Maybe | GrNode::Plus | GrNode::Star | GrNode::LForm(_) | GrNode::RAssoc | GrNode::PrecEq | GrNode::Instance => false,
346 };
347 Some((is_empty, had_empty_term))
348}
349
350fn remove_concat_dup_empty(tree: &GrTree, nodes: &mut Vec<usize>) {
357 if nodes.len() > 1 {
358 let mut i = 0;
359 while i < nodes.len() && nodes.len() > 1 {
360 if tree.get(nodes[i]).is_empty() {
361 nodes.remove(i);
362 } else {
363 i += 1;
364 }
365 }
366 }
367}
368
369pub trait GrTreeExt {
374 fn get_dup(&mut self, dup_index: &mut Dup) -> usize;
375 fn to_str(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String;
376 fn to_str_index(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String;
377}
378
379impl GrTreeExt for GrTree {
380 fn get_dup(&mut self, dup_index: &mut Dup) -> usize {
381 match dup_index.get() {
382 DupVal::Original(index) => index as usize,
383 DupVal::Copy(index) => {
384 let node = self.get(index as usize).clone();
385 self.add(None, node)
386 }
387 }
388 }
389
390 fn to_str(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String {
391 let tfmt = GrTreeFmt {
392 tree: &self,
393 show_ids: false,
394 show_depth: false,
395 symbol_table,
396 start_node,
397 };
398 tfmt.to_string()
399 }
400
401 fn to_str_index(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String {
402 let tfmt = GrTreeFmt {
403 tree: &self,
404 show_ids: true,
405 show_depth: false,
406 symbol_table,
407 start_node,
408 };
409 tfmt.to_string()
410 }
411}
412
413pub struct GrTreeFmt<'a> {
414 tree: &'a GrTree,
415 show_ids: bool,
416 show_depth: bool,
417 symbol_table: Option<&'a SymbolTable>,
418 start_node: Option<usize>
419}
420
421impl<'a> GrTreeFmt<'a> {
422 fn snode(&self, node: &GrNode, node_id: usize, depth: u32) -> String {
423 let show_ids = self.show_ids;
424 let show_depth = self.show_depth;
425 let mut result = String::new();
426 if show_depth {
427 result.push_str(&depth.to_string());
428 result.push('>');
429 }
430 if show_ids {
431 result.push_str(&node_id.to_string());
432 result.push(':');
433 }
434 let name = if let GrNode::Symbol(sym) = node {
435 if self.symbol_table.is_some() { sym.to_str_quote(self.symbol_table) } else { sym.to_str(self.symbol_table) }
436 } else {
437 node.to_str(self.symbol_table)
438 };
439 result.push_str(name.as_str());
440 result
441 }
442}
443
444impl Display for GrTreeFmt<'_> {
445 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
446 if self.tree.is_empty() {
447 return write!(f, "<empty>");
448 }
449 let start_node = self.start_node.unwrap_or_else(|| self.tree.get_root().expect("the tree must have a defined root"));
450 let mut stack = Vec::<String>::new();
451 for node in self.tree.iter_depth_at(start_node) {
452 let n = node.num_children();
453 if n > 0 {
454 let children = stack.drain(stack.len() - n..).join(", ");
455 stack.push(format!("{}({children})", self.snode(&node, node.index, node.depth)));
456 } else {
457 stack.push(self.snode(&node, node.index, node.depth));
458 }
459 }
460 write!(f, "{}", stack.pop().unwrap_or_else(|| "empty".to_string()))
461 }
462}
463
464#[derive(Clone, Copy, PartialEq, Debug)]
467pub enum NTConversion {
468 Removed,
470 MovedTo(VarId)
472}
473
474#[derive(Clone, Debug)]
475pub struct RuleTreeSet<T> {
476 trees: Vec<GrTree>,
477 start: Option<VarId>,
478 symbol_table: Option<SymbolTable>,
479 flags: Vec<u32>, parent: Vec<Option<VarId>>, nt_conversion: HashMap<VarId, NTConversion>,
482 origin: Origin<(VarId, usize), FromRTS>,
483 log: BufLog,
484 _phantom: PhantomData<T>
485}
486
487impl<T> HasBuildErrorSource for RuleTreeSet<T> {
488 const SOURCE: BuildErrorSource = BuildErrorSource::RuleTreeSet;
489}
490
491impl<T> RuleTreeSet<T> {
494 pub fn get_num_nt(&self) -> VarId {
495 self.trees.len() as VarId
496 }
497
498 pub fn get_tree(&self, var: VarId) -> Option<&GrTree> {
499 self.trees.get(var as usize)
500 }
501
502 pub fn get_trees_iter(&self) -> impl Iterator<Item=(VarId, &GrTree)> {
504 self.trees.iter().index().filter_map(|(id, t)| if t.is_empty() { None } else { Some((id, t)) })
505 }
506
507 pub fn get_vars(&self) -> impl Iterator<Item=VarId> + '_ {
509 (0..self.trees.len()).filter_map(|id| if self.trees[id].is_empty() { None } else { Some(id as VarId) })
510 }
511
512 pub fn get_next_available_var(&self) -> VarId {
514 self.trees.len() as VarId
515 }
516
517 pub fn get_terminals(&self) -> HashSet<TokenId> {
519 let tset = self.trees.iter()
520 .flat_map(|t| t.iter_depth_simple())
521 .filter_map(|x| if let GrNode::Symbol(Symbol::T(t)) = x.deref() { Some(*t) } else { None })
522 .collect::<HashSet<_>>();
523 tset
524 }
525
526 pub fn set_symbol_table(&mut self, symbol_table: SymbolTable) {
527 self.symbol_table = Some(symbol_table);
528 }
529
530 pub fn get_symbol_table(&self) -> Option<&SymbolTable> {
531 self.symbol_table.as_ref()
532 }
533
534 pub fn set_start(&mut self, start: VarId) {
536 self.start = Some(start);
537 }
538
539 pub fn to_str(&self, var: VarId, node: Option<usize>, emphasis: Option<usize>) -> String {
543 grtree_to_str(&self.trees[var as usize], node, emphasis, Some(var), self.get_symbol_table(), false)
544 }
545
546 pub fn get_log_mut(&mut self) -> &mut BufLog {
547 &mut self.log
548 }
549}
550
551impl<T> LogReader for RuleTreeSet<T> {
552 type Item = BufLog;
553
554 fn get_log(&self) -> &Self::Item {
555 &self.log
556 }
557
558 fn give_log(self) -> Self::Item {
559 self.log
560 }
561}
562
563impl RuleTreeSet<General> {
565 pub fn new() -> Self {
566 Self::with_log(BufLog::new())
567 }
568
569 pub fn with_log(log: BufLog) -> Self {
570 RuleTreeSet {
571 trees: Vec::new(),
572 start: None,
573 symbol_table: None,
574 flags: Vec::new(),
575 parent: Vec::new(),
576 nt_conversion: HashMap::new(),
577 origin: Origin::new(),
578 log,
579 _phantom: PhantomData,
580 }
581 }
582
583 pub fn get_tree_mut(&mut self, var: VarId) -> &mut GrTree {
585 let var = var as usize;
586 if var >= self.trees.len() {
587 self.trees.resize(var + 1, GrTree::new());
588 }
589 &mut self.trees[var]
590 }
591
592 pub fn set_tree(&mut self, var: VarId, tree: GrTree) {
595 let var = var as usize;
596 if var >= self.trees.len() {
597 if var > self.trees.len() {
598 if self.trees.capacity() < var + 1 {
599 self.trees.reserve(var + 1 - self.trees.capacity())
601 }
602 self.trees.resize(var, GrTree::new());
603 }
604 self.trees.push(tree);
605 } else {
606 self.trees[var] = tree;
607 }
608 }
609
610 pub fn normalize(&mut self) {
612 self.log.add_note("original rules:");
613 (0..self.trees.len() as VarId)
614 .for_each(|v| self.log.add_note(format!("- NT[{v:2}] {} -> {}", Symbol::NT(v).to_str(self.get_symbol_table()), self.to_str(v, None, None))));
615 self.log.add_note("normalizing rules...");
616 self.check_num_nt_coherency();
617 self.normalize_vars();
618 self.flags.resize(self.trees.len(), 0);
619 self.parent.resize(self.trees.len(), None);
620 }
621
622 fn check_num_nt_coherency(&mut self) {
623 if let Some(n) = self.symbol_table.as_ref().and_then(|table| Some(table.get_num_nt())) {
624 if n != self.trees.len() {
625 self.log.add_error(format!("there are {} rules but the symbol table has {n} nonterminal symbols: dropping the table", self.trees.len()));
626 self.symbol_table = None;
627 }
628 }
629 }
630
631 fn normalize_vars(&mut self) {
638 const VERBOSE: bool = false;
639 const VERBOSE_CC: bool = false;
640 let mut exclude_nt = HashSet::<VarId>::new();
641 for var in 0..self.get_num_nt() {
642 if exclude_nt.contains(&var) {
643 if VERBOSE { println!("NT[{var}] {} excluded", Symbol::NT(var).to_str(self.get_symbol_table())); }
644 continue
645 }
646 if VERBOSE { println!("normalize_var(NT[{var}] {})", Symbol::NT(var).to_str(self.get_symbol_table())); }
647 let mut new_var = self.get_next_available_var();
648 let orig = take(&mut self.trees[var as usize]);
649 let mut new = GrTree::new();
650 let mut orig_new = GrTree::new();
651 let mut orig_rep_vars = HashMap::<VarId, usize>::new();
652 let mut stack = Vec::<usize>::new(); for sym in orig.iter_depth() {
654 let n = sym.num_children();
655 if VERBOSE { println!("- old {}:{}", sym.index, *sym); }
656 if n == 0 {
657 let new_id = new.add(None, orig.get(sym.index).clone());
658 stack.push(new_id);
659 if VERBOSE { print!(" leaf: "); }
660 } else {
661 match *sym {
662 GrNode::Concat | GrNode::Or => {
668 let children = stack.split_off(stack.len() - n);
669 let new_id = if children.iter().all(|&idx| !matches!(new.get(idx), GrNode::Concat|GrNode::Or)) {
670 if VERBOSE { print!(" trivial {}: children={}\n ", *sym, children.iter().map(|s| new.get(*s).to_str(self.get_symbol_table())).join(", ")); }
671 new.addci_iter(None, sym.clone(), children)
673 } else {
674 if *sym == GrNode::Or {
675 if VERBOSE {
676 println!(" or: children={}", children.iter().map(|&id| format!("{}", new.to_str_index(Some(id), self.get_symbol_table()))).join(", "));
678 }
679 let mut new_children = Vec::new();
687 for id in children {
688 match new.get(id) {
689 GrNode::Symbol(_) | GrNode::Concat => {
690 if VERBOSE { println!(" - child {id} is {}", new.get(id)); }
691 new_children.push(id);
692 }
693 GrNode::Or => {
694 if VERBOSE { println!(" - child {id} is | with children {:?}", new.children(id)); }
695 new_children.extend(new.children(id));
696 }
697 x => panic!("unexpected node type under | node: {x}"),
698 }
699 }
700 new.addci_iter(None, gnode!(|), new_children)
701 } else { if VERBOSE_CC { println!(" &: children={children:?}"); }
703 let mut dups = Vec::<Vec<Dup>>::new();
715 let concats_children = children.into_iter()
716 .flat_map(|id| {
718 if VERBOSE_CC { print!(" FL {}: ", new.get(id)); }
719 match new.get(id) {
720 GrNode::Concat =>
721 new.children(id).iter().map(|idc| vec![vaddi(&mut dups, [Dup::new(*idc)])]).to_vec(),
722 GrNode::Or => {
723 let children = new.children(id).to_vec();
724 vec![children.into_iter().map(|idc| {
725 if let GrNode::Concat = new.get(idc) {
726 let idc_children = new.children(idc).iter().map(|i| Dup::new(*i)).to_vec();
727 vaddi(&mut dups, idc_children)
728 } else {
729 vaddi(&mut dups, [Dup::new(idc)])
730 }
731 }).to_vec()]
732 }
733 _ => vec![vec![vaddi(&mut dups, [Dup::new(id)])]],
734 }
735 })
736 .cproduct()
739 .map(|dup_ids| {
743 let mut nodes = dup_ids.into_iter()
744 .flat_map(|dup_id| dups.get_mut(dup_id).unwrap().iter_mut()
745 .map(|dup| new.get_dup(dup)).to_vec()).to_vec();
746 remove_concat_dup_empty(&new, &mut nodes);
747 nodes
748 })
749 .to_vec();
751 let concats = concats_children.into_iter()
753 .map(|children_ids| new.addci_iter(None, gnode!(&), children_ids))
754 .to_vec();
755 new.addci_iter(None, gnode!(|), concats)
757 }
758 };
759 stack.push(new_id);
760 }
761 GrNode::Maybe => {
762 if n != 1 {
763 self.log.add_error(format!("normalize_var({}): ? should only have one child; found {n}: {}",
764 Symbol::NT(var).to_str(self.get_symbol_table()),
765 orig.to_str(Some(sym.index), self.get_symbol_table())));
766 } else {
767 if VERBOSE { print!(" ?: "); }
773 let maybe_child = stack.pop().unwrap();
774 let proceed = match grtree_cleanup(&mut new, Some(maybe_child), true) {
775 None => {
776 self.log.add_error(format!(
777 "unexpected child of ?: {} (should be &, |, or symbol)",
778 grtree_to_str(&new, Some(maybe_child), None, Some(var), self.get_symbol_table(), false)));
779 if VERBOSE { println!("ERROR: unexpected child of ?: {}", grtree_to_str(&new, Some(maybe_child), None, Some(var), self.get_symbol_table(), false)); }
780 return;
781 }
782 Some((true, _)) => {
784 stack.push(maybe_child);
786 if VERBOSE { println!("child of ? simplified to ε"); }
787 false
788 }
789 _ => true,
790 };
791 if proceed {
792 let empty = new.add(None, gnode!(e));
793 let id = match new.get(maybe_child) {
794 GrNode::Or => {
795 new.add(Some(maybe_child), gnode!(e));
796 maybe_child
797 }
798 _ => new.addci_iter(None, gnode!(|), [maybe_child, empty])
799 };
800 stack.push(id);
801 }
802 }
803 }
804 GrNode::Plus | GrNode::Star => {
805 let mut is_plus = *sym == GrNode::Plus;
825 let sym_char = if is_plus { '+' } else { '*' };
826 if VERBOSE { print!(" {sym_char}: "); }
827 if n != 1 {
828 self.log.add_error(format!(
829 "normalize_var({}): {sym_char} should only have one child; found {n}: {}",
830 Symbol::NT(var).to_str(self.get_symbol_table()),
831 orig.to_str(Some(sym.index), self.get_symbol_table())));
832 if VERBOSE { println!("ERROR: found {n} children instead of 1"); }
833 return;
834 }
835 let rep_child = stack.pop().unwrap();
836 if VERBOSE {}
837 let proceed = match grtree_cleanup(&mut new, Some(rep_child), true) {
838 None => {
839 self.log.add_error(format!(
840 "unexpected child of {sym_char}: {} (should be &, |, or symbol)",
841 grtree_to_str(&new, Some(rep_child), None, Some(var), self.get_symbol_table(), false)));
842 if VERBOSE { println!("ERROR: unexpected child {}", grtree_to_str(&new, Some(rep_child), None, Some(var), self.get_symbol_table(), false)); }
843 return;
844 }
845 Some((true, _)) => {
847 stack.push(rep_child);
849 if VERBOSE { println!("child simplified to ε"); }
850 false
851 }
852 Some((false, true)) => {
853 if is_plus {
857 is_plus = false;
858 if VERBOSE { print!(" becomes * (child lost ε term), "); }
860 }
861 true
862 }
863 Some((false, false)) => true, };
865 if proceed {
866 if VERBOSE { println!("=> {}", grtree_to_str(&new, Some(rep_child), None, Some(var), self.get_symbol_table(), false)); }
867 let orig_rep = orig_new.add(None, if is_plus { gnode!(+) } else { gnode!(*) });
868 let orig_rep_child = orig_new.add_from_tree(Some(orig_rep), &new, Some(rep_child));
869 let (id, qvar) = self.normalize_plus_or_star(
870 rep_child, orig_rep, orig_rep_child, &mut new, &mut orig_new, var, &mut new_var, is_plus, &mut exclude_nt);
871 stack.push(id);
872 orig_rep_vars.insert(qvar, orig_rep); }
874 }
875 _ => panic!("Unexpected {}", sym.deref())
876 }
877 }
878 if VERBOSE {
879 println!("stack: {}", stack.iter()
880 .map(|id| {
881 let children = new.children(*id);
882 format!("{id}:{}{}", new.get(*id), if children.is_empty() { "".to_string() } else { format!("({})", children.iter().join(",")) })
883 }).join(", ")
884 );
885 }
886 }
887 if stack.len() != 1 {
888 self.log.add_error(format!("normalize_var({}): error while normalizing the rules, {} remaining nodes instead of 1",
889 Symbol::NT(var).to_str(self.get_symbol_table()), stack.len()));
890 return;
891 }
892 if VERBOSE_CC { println!("Final stack id: {}", stack[0]); }
893 let root = stack.pop().unwrap();
894 new.set_root(root);
895 match grtree_cleanup(&mut new, None, false) {
896 None => {
897 self.log.add_error(format!(
898 "unexpected root of {} -> {} (should be &, |, or symbol)",
899 Symbol::NT(var).to_str(self.get_symbol_table()),
900 grtree_to_str(&new, None, None, Some(var), self.get_symbol_table(), false)));
901 if VERBOSE { println!("ERROR: unexpected root {}", grtree_to_str(&new, None, None, Some(var), self.get_symbol_table(), false)); }
902 }
903 Some((true, _)) => {
905 self.log.add_warning(format!("{} is empty", Symbol::NT(var).to_str(self.get_symbol_table())));
906 }
907 _ => {}
908 }
909 let orig_root = orig_new.add_from_tree_callback(None, &new, None, |from, to, _| self.origin.add((var, to), (var, from)));
910 orig_new.set_root(orig_root);
911 while !orig_rep_vars.is_empty() {
912 let mut orig_rep_nodes = Vec::<(usize, usize)>::new();
919 let mut to_remove = Vec::<VarId>::new();
920 for node in orig_new.iter_depth() {
921 if let GrNode::Symbol(Symbol::NT(rep_var)) = node.deref() {
922 if let Some(&orig_rep_id) = orig_rep_vars.get(&rep_var) {
923 to_remove.push(*rep_var);
924 orig_rep_nodes.push((node.index, orig_rep_id));
925 self.origin.add((*rep_var, self.get_tree(*rep_var).unwrap().get_root().unwrap()), (var, orig_rep_id));
926 }
927 }
928 }
929 for (orig_id, child_id) in orig_rep_nodes {
930 *orig_new.get_mut(orig_id) = gnode!(inst);
931 orig_new.attach_child(orig_id, child_id);
932 }
933 for var in to_remove {
934 orig_rep_vars.remove(&var);
935 }
936 }
937 self.origin.set_tree(var, orig_new);
938 self.set_tree(var, new);
939 }
940 }
941
942 fn normalize_plus_or_star(
943 &mut self, rep_child: usize, orig_rep: usize, orig_rep_child: usize,
944 new: &mut GrTree, orig_new: &mut GrTree, var: VarId, new_var: &mut VarId, is_plus: bool,
945 exclude_nt: &mut HashSet<VarId>
946 ) -> (usize, VarId)
947 {
948 const VERBOSE: bool = false;
949 const OPTIMIZE_SUB_OR: bool = false;
950 self.symbol_table.as_ref().map(|st| assert_eq!(st.get_num_nt(), self.trees.len(), "number of nt in symbol table doesn't match num_nt"));
951 let (mut qvar, mut rvar) = (*new_var, *new_var + 1);
952 let mut qtree = GrTree::new();
953 let mut rtree = GrTree::new();
954 let mut use_rtree = false;
955
956 let mut lform_nt = None;
958 for node in orig_new.iter_depth_at(orig_rep_child) {
959 if let GrNode::LForm(v) = *node {
960 if matches!(lform_nt, Some(v2) if v != v2) {
961 let symtab = self.get_symbol_table();
962 self.log.add_error(
963 format!("in {}, {}: conflicting <L={}> and <L={}>",
964 Symbol::NT(var).to_str(symtab),
965 grtree_to_str(orig_new, Some(orig_rep), None, Some(var), symtab, false),
966 Symbol::NT(lform_nt.unwrap()).to_str(symtab), Symbol::NT(v).to_str(symtab)));
967 } else {
968 lform_nt = Some(v);
969 (qvar, rvar) = (v, *new_var);
970 }
971 }
972 }
973
974 match orig_new.get(orig_rep_child) {
977 GrNode::Symbol(s) => {
978 if VERBOSE { print!("({rep_child}:{s}) "); }
979 let or = qtree.add_root(gnode!(|));
981 let cc = qtree.add(Some(or), gnode!(&));
982 let child = qtree.add(Some(cc), GrNode::Symbol(s.clone()));
983 qtree.add(Some(cc), gnode!(nt qvar));
984 let child2 = qtree.add(Some(or), if is_plus { GrNode::Symbol(s.clone()) } else { gnode!(e) });
985 self.origin.add((qvar, child), (var, orig_rep_child)); self.origin.add((qvar, cc), (var, orig_rep_child));
987 if is_plus {
988 self.origin.add((qvar, child2), (var, orig_rep_child));
989 }
990 }
991 GrNode::Concat => {
992 let id_grchildren = new.children(rep_child);
993 if VERBOSE { print!("({rep_child}:&({})) ", id_grchildren.iter().join(", ")); }
994 let or = qtree.add_root(gnode!(|));
995 let cc1 = qtree.add_from_tree_callback(Some(or), orig_new, Some(orig_rep_child), |to, from, _n| {
996 self.origin.add((qvar, to), (var, from))
997 });
998 let loop_id = qtree.add(Some(cc1), gnode!(nt qvar));
999 self.origin.add((qvar, loop_id), (var, orig_rep));
1000 if is_plus {
1001 let loop_id2 = qtree.add_from_tree(Some(or), &new, Some(rep_child));
1002 self.origin.add((qvar, loop_id2), (var, orig_rep_child));
1003 } else {
1004 qtree.add(Some(or), gnode!(e));
1005 }
1006 }
1007 #[allow(unreachable_patterns)]
1008 GrNode::Or => if !OPTIMIZE_SUB_OR {
1009 let id_grchildren = new.children(rep_child);
1010 if VERBOSE { print!("({rep_child}:|({})) ", id_grchildren.iter().join(", ")); }
1011 let orig_id_grchildren = orig_new.children(orig_rep_child);
1012 let or = qtree.add_root(gnode!(|));
1013 for orig_id_grchild in orig_id_grchildren {
1014 let orig_grchild = orig_new.get(*orig_id_grchild);
1015 match orig_grchild {
1016 GrNode::Symbol(s) => {
1017 let cc = qtree.add(Some(or), gnode!(&));
1018 let child = qtree.add_iter(Some(cc), [GrNode::Symbol(s.clone()), gnode!(nt qvar)])[0];
1019 self.origin.add((qvar, cc), (var, *orig_id_grchild));
1020 self.origin.add((qvar, child), (var, *orig_id_grchild));
1021 if is_plus {
1022 let plus_or = qtree.add(Some(or), GrNode::Symbol(s.clone()));
1023 self.origin.add((qvar, plus_or), (var, *orig_id_grchild));
1024 }
1025 }
1026 GrNode::Concat => {
1027 let cc = qtree.add_from_tree_callback(Some(or), orig_new, Some(*orig_id_grchild), |to, from, _n| {
1028 self.origin.add((qvar, to), (var, from));
1029 });
1030 qtree.add(Some(cc), gnode!(nt qvar));
1031 if is_plus {
1032 qtree.add_from_tree_callback(Some(or), &orig_new, Some(*orig_id_grchild), |to, from, _| {
1033 self.origin.add((qvar, to), (var, from));
1034 });
1035 }
1036 }
1037 x => panic!("unexpected node type under a | node: {x}"),
1038 }
1039 }
1040 if !is_plus {
1041 qtree.add(Some(or), gnode!(e));
1042 }
1043 }
1044 #[allow(unreachable_patterns)]
1046 GrNode::Or => if OPTIMIZE_SUB_OR {
1047 let id_grchildren = new.children(rep_child);
1060 if VERBOSE { print!("({rep_child}:|({})) ", id_grchildren.iter().join(", ")); }
1061 let orig_id_grchildren = orig_new.children(orig_rep_child);
1062 let or = qtree.add_root(gnode!(|));
1063 for orig_id_grchild in orig_id_grchildren {
1064 let orig_grchild = new.get(*orig_id_grchild);
1065 match orig_grchild {
1066 GrNode::Symbol(s) => {
1067 if is_plus {
1068 qtree.addc_iter(Some(or), gnode!(&), [GrNode::Symbol(s.clone()), gnode!(nt rvar)]);
1069 use_rtree = true;
1070 } else {
1071 qtree.addc_iter(Some(or), gnode!(&), [GrNode::Symbol(s.clone()), gnode!(nt qvar)]);
1072 }
1073 }
1074 GrNode::Concat => {
1075 let cc = qtree.add_from_tree_callback(Some(or), orig_new, Some(*orig_id_grchild), |to, from, _n| {
1076 self.origin.add((qvar, to), (var, from));
1077 });
1078 if is_plus {
1079 qtree.add(Some(cc), gnode!(nt rvar));
1080 } else {
1081 qtree.add(Some(cc), gnode!(nt qvar));
1082 }
1083 }
1084 x => panic!("unexpected node type under a | node: {x}"),
1085 }
1086 }
1087 if use_rtree {
1088 let or1 = rtree.add_root(gnode!(|));
1089 rtree.add_iter(Some(or1), [gnode!(nt qvar), gnode!(e)]);
1090 } else if !is_plus {
1091 qtree.add(Some(or), gnode!(e));
1092 }
1093 }
1094 _ => panic!("Unexpected node type under a + node: {}", new.get(rep_child))
1095 }
1096 if let Some(v) = lform_nt {
1097 if v == var {
1099 self.log.add_error(
1100 format!("in {}, {}: <L> points to the same nonterminal. It must be a new one, created for the loop.",
1101 Symbol::NT(var).to_str(self.get_symbol_table()),
1102 grtree_to_str(orig_new, Some(orig_rep), None, Some(var), self.get_symbol_table(), false)));
1103 } else {
1104 for mut node in qtree.iter_depth_simple_mut() {
1106 if let GrNode::LForm(v2) = node.deref_mut() {
1107 if *v2 == v { *v2 = qvar; }
1108 }
1109 }
1110 for mut node in orig_new.iter_depth_simple_at_mut(orig_rep_child) {
1111 if let GrNode::LForm(v2) = node.deref_mut() {
1112 if *v2 == v { *v2 = qvar; }
1113 }
1114 }
1115 }
1116 }
1117 self.symbol_table.as_mut().map(|st| {
1118 if let Some(_v) = lform_nt {
1119 } else {
1125 assert_eq!(st.add_child_nonterminal(var), qvar);
1126 }
1127 if use_rtree {
1128 assert_eq!(st.add_child_nonterminal(var), rvar);
1129 }
1130 });
1131 let id = new.add(None, gnode!(nt qvar));
1132 assert!(qvar as usize >= self.trees.len() || self.trees[qvar as usize].is_empty(), "overwriting tree {new_var}");
1133 if VERBOSE { println!("qtree: NT[{qvar}] {} -> {}", Symbol::NT(qvar).to_str(self.get_symbol_table()), grtree_to_str(&qtree, None, None, Some(qvar), self.get_symbol_table(), false) ); }
1134 self.set_tree(qvar, qtree);
1135 exclude_nt.insert(qvar);
1136 self.flags.resize(rvar as usize, 0);
1137 self.parent.resize(rvar as usize, None);
1138 let plus_flag = if is_plus { ruleflag::REPEAT_PLUS } else { 0 };
1139 self.flags[qvar as usize] = ruleflag::CHILD_REPEAT | plus_flag;
1140 self.flags[var as usize] |= ruleflag::PARENT_REPEAT | plus_flag;
1141 self.parent[qvar as usize] = Some(var);
1142 if use_rtree {
1143 if VERBOSE { println!("rtree: NT[{rvar}] {} -> {}", Symbol::NT(rvar).to_str(self.get_symbol_table()), grtree_to_str(&rtree, None, None, Some(rvar), self.get_symbol_table(), false)); }
1144 self.set_tree(rvar, rtree);
1145 exclude_nt.insert(var);
1146 self.flags.resize(rvar as usize + 1, 0);
1147 self.parent.resize(rvar as usize + 1, None);
1148 self.flags[rvar as usize] |= ruleflag::CHILD_L_FACT;
1149 self.parent[rvar as usize] = Some(qvar);
1150 self.flags[qvar as usize] |= ruleflag::PARENT_L_FACTOR;
1151 }
1152 if VERBOSE {
1153 println!("=> new sizes, flags = {}, parent = {}, trees = {} (new_var = {new_var})", self.flags.len(), self.parent.len(), self.trees.len());
1154 println!("=> {}: parent {}, child {}{}",
1155 if is_plus { "+" } else { "*" },
1156 Symbol::NT(var).to_str(self.get_symbol_table()),
1157 Symbol::NT(qvar).to_str(self.get_symbol_table()),
1158 if use_rtree { format!(", child {}", Symbol::NT(rvar).to_str(self.get_symbol_table())) } else { String::new() }
1159 );
1160 }
1161 let mut rectify_maybe = None;
1169 for node in self.get_tree(qvar).unwrap().iter_depth_simple() {
1170 if let GrNode::Symbol(Symbol::NT(child)) = node.deref() {
1171 if *child != qvar && self.flags[*child as usize] & ruleflag::CHILD_REPEAT != 0 {
1172 rectify_maybe = Some(*child);
1173 break;
1174 }
1175 }
1176 }
1177 if let Some(child) = rectify_maybe {
1178 self.parent[child as usize] = Some(qvar);
1179 self.flags[qvar as usize] |= ruleflag::PARENT_REPEAT;
1180 if VERBOSE {
1181 println!("=> rectify {}'s parent as {}",
1182 Symbol::NT(child).to_str(self.get_symbol_table()),
1183 Symbol::NT(qvar).to_str(self.get_symbol_table()));
1184 }
1185 }
1186 *new_var = self.get_next_available_var();
1187 (id, qvar)
1188 }
1189}
1190
1191impl BuildFrom<RuleTreeSet<General>> for RuleTreeSet<Normalized> {
1192 fn build_from(mut rules: RuleTreeSet<General>) -> Self {
1197 if rules.log.has_no_errors() {
1201 rules.normalize();
1202 }
1203 RuleTreeSet::<Normalized> {
1204 trees: rules.trees,
1205 start: rules.start,
1206 symbol_table: rules.symbol_table,
1207 flags: rules.flags,
1208 parent: rules.parent,
1209 nt_conversion: rules.nt_conversion,
1210 origin: rules.origin,
1211 log: rules.log,
1212 _phantom: PhantomData
1213 }
1214 }
1215}
1216
1217pub mod macros {
1228 #[macro_export]
1248 macro_rules! gnode {
1249 ([$id:expr]) => { gnode!(t $id) };
1250 (t $id:expr) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::T($id as $crate::TokenId)) };
1251 (nt $id:expr) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::NT($id as $crate::VarId)) };
1252 (e) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::Empty) };
1253 (end) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::End) };
1254 (&) => { $crate::grammar::GrNode::Concat };
1256 (|) => { $crate::grammar::GrNode::Or };
1257 (?) => { $crate::grammar::GrNode::Maybe };
1258 (+) => { $crate::grammar::GrNode::Plus };
1259 (*) => { $crate::grammar::GrNode::Star };
1260 (L $id:expr) => { $crate::grammar::GrNode::LForm($id) };
1261 (R) => { $crate::grammar::GrNode::RAssoc };
1262 (P) => { $crate::grammar::GrNode::PrecEq };
1263 (inst) => { $crate::grammar::GrNode::Instance };
1264 }
1265
1266 #[macro_export]
1277 macro_rules! sym {
1278 (t $id:expr) => { $crate::parser::Symbol::T($id as $crate::TokenId) };
1279 (nt $id:expr) => { $crate::parser::Symbol::NT($id as $crate::VarId) };
1280 (e) => { $crate::parser::Symbol::Empty };
1281 (end) => { $crate::parser::Symbol::End };
1282 }
1283
1284 #[macro_export]
1285 macro_rules! altflag {
1286 (L) => { $crate::alt::ruleflag::L_FORM };
1287 (R) => { $crate::alt::ruleflag::R_ASSOC };
1288 (G) => { $crate::alt::ruleflag::GREEDY };
1289 (P) => { $crate::alt::ruleflag::PREC_EQ };
1290 ($f:expr) => { $f };
1291 }
1292
1293 #[macro_export]
1316 macro_rules! alt {
1317 () => { $crate::alt::Alternative::new(std::vec![]) };
1318 ($($a:ident $($b:expr)?,)+) => { alt!($($a $($b)?),+) };
1319 ($($a:ident $($b:expr)?),*) => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]) };
1320 (#$f:literal, $($a:ident $($b:expr)?,)+) => { alt!(#$f, $($a $($b)?),+) };
1321 (#$f:literal, $($a:ident $($b:expr)?),*) => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($f) };
1322 ($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?,)+) => { alt!($(#$f,)? $(%($v, $id),)? $($a $($b)?),+) };
1326 ($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*)
1327 => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*])$(.with_flags($crate::altflag!($f)))?$(.with_origin($v, $id))? };
1328 (#($f:expr, $o:expr), $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?,)+)
1330 => { alt!(#($f, $o), $(%($v, $id),)? $($a $($b)?),+) };
1331 (#($f:expr, $o:expr), $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*)
1332 => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($crate::altflag!($f)).with_ambig_alt_id($o)$(.with_origin($v, $id))? };
1333 (%($v:expr, $id:expr), $($a:ident $($b:expr)?,)+)
1334 => { alt!(%($v, $id), $($a $($b)?),+) };
1335 (%($v:expr, $id:expr), $($a:ident $($b:expr)?),*)
1336 => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($crate::altflag!($f)).with_origin($v, $id) };
1337 }
1338
1339 #[macro_export]
1340 macro_rules! symbols {
1341 () => { std::vec![] };
1342 ($($a:ident $($b:literal $(: $num:expr)?)?,)+) => { symbols![$($a $($b $(: $num)?)?),+] };
1343 ($($a:ident $($b:literal $(: $num:expr)?)?),*) => { std::vec![$($crate::sym!($a $($b $(: $num)?)?)),*] };
1344 }
1345
1346 #[macro_export]
1367 macro_rules! prule {
1368 () => { std::vec![] };
1369 ($($(#$f:literal,)? $($a:ident $($b:expr)?),*;)+) => { prule![$($(#$f,)? $($a $($b)?),+);+] };
1370 ($($(#$f:literal,)? $($a:ident $($b:expr)?),*);*) => { std::vec![$($crate::alt![$(#$f,)? $($a $($b)?),+]),*]};
1371 ($($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*;)+) => { prule![$($(#$f,)? $(%($v, $id),)? $($a $($b)?),+);+] };
1372 ($($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*);*) => { std::vec![$($crate::alt![$(#$f,)? $(%($v, $id),)? $($a $($b)?),+]),*]};
1373 ($($(#($f:expr, $o:expr),)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*;)+) => { prule![$($(#($f, $o),)? $(%($v, $id),)? $($a $($b)?),+);+] };
1375 ($($(#($f:expr, $o:expr),)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*);*) => { std::vec![$($crate::alt![$(#($f,$o),)? $(%($v, $id),)? $($a $($b)?),+]),*]};
1376 }
1377}