lexigram_lib/grammar/
prs.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3use super::*;
4use crate::{columns_to_str, AltId, SymbolTable, VarId};
5use lexigram_core::alt::{alt_to_rule_str, Alternative};
6use lexigram_core::CollectJoin;
7
8/// Stores a normalized production rule, where each alternative (e.g. `B C`) is stored in
9/// a `Vec<GrNode>` and all the alternatives are stored in a `Vec`.
10///
11/// ## Example
12/// `A -> B C | D | ε`
13///
14/// where the nonterminal indices A=0, B=1, C=2, D=3, is stored as:
15///
16/// `[[gnode!(nt 1), gnode!(nt 2)],[gnode!(nt 3)],[gnode!(e)]]`
17///
18/// (where the representation of vectors & type names has been simplified to square brackets).
19pub type ProdRule = Vec<Alternative>;
20
21pub fn prule_to_str(prule: &ProdRule, symbol_table: Option<&SymbolTable>) -> String {
22    prule.iter().map(|alt| alt.to_str(symbol_table)).join(" | ")
23}
24
25pub fn prule_to_rule_str(nt: VarId, prule: &ProdRule, symbol_table: Option<&SymbolTable>) -> String {
26    format!("{} -> {}", Symbol::NT(nt).to_str(symbol_table), prule.iter().map(|alt| alt.to_str(symbol_table)).join(" | "))
27
28}
29
30pub fn prule_to_macro(prule: &ProdRule) -> String {
31    format!("prule!({})", prule.iter().map(|alt| alt.to_macro_item()).join("; "))
32}
33
34#[derive(Clone, Copy, PartialEq, Debug)]
35enum AltType { Independant, LeftAssoc, Prefix, RightAssoc, Suffix }
36
37impl AltType {
38    fn from(nt: &Symbol, alt: &Alternative) -> Self {
39        let left = matches!(alt.first(), Some(var) if var == nt);
40        let right = alt.len() > 1 && matches!(alt.last(), Some(var) if var == nt);
41        match (left, right) {
42            (false, false) => AltType::Independant,
43            (false, true)  => AltType::Prefix,
44            (true, false)  => AltType::Suffix,
45            (true, true)   => if alt.flags & ruleflag::R_ASSOC != 0 { AltType::RightAssoc } else { AltType::LeftAssoc },
46        }
47    }
48}
49
50#[derive(Debug, Clone)]
51struct AltInfo {
52    #[allow(unused)]
53    pred_priority: Option<AltId>,
54    ivar: usize,
55    ty: AltType
56}
57
58#[derive(Debug)]
59pub struct LLParsingTable {
60    pub num_nt: usize,
61    pub num_t: usize,
62    pub alts: Vec<(VarId, Alternative)>,
63    pub table: Vec<AltId>,
64    pub flags: Vec<u32>,            // NT -> flags (+ or * normalization)
65    pub parent: Vec<Option<VarId>>, // NT -> parent NT
66}
67
68impl LLParsingTable {
69    pub fn new() -> Self {
70        LLParsingTable { num_nt: 0, num_t: 0, alts: vec![], table: vec![], flags: vec![], parent: vec![] }
71    }
72
73    pub fn get_top_parent(&self, nt: VarId) -> VarId {
74        let mut var = nt;
75        while let Some(parent) = self.parent[var as usize] {
76            var = parent;
77        }
78        var
79    }
80}
81
82#[derive(Clone, Debug)]
83pub struct ProdRuleSet<T> {
84    pub(super) prules: Vec<ProdRule>,
85    pub(crate) origin: Origin<VarId, FromPRS>,
86    pub(super) num_nt: usize,
87    pub(super) num_t: usize,
88    pub(crate) symbol_table: Option<SymbolTable>,
89    pub(super) flags: Vec<u32>,
90    pub(super) parent: Vec<Option<VarId>>,
91    pub(super) start: Option<VarId>,
92    pub(crate) name: Option<String>,
93    pub(crate) nt_conversion: HashMap<VarId, NTConversion>,
94    pub(crate) log: BufLog,
95    pub(super) _phantom: PhantomData<T>
96}
97
98impl<T> ProdRuleSet<T> {
99    /// Returns the starting production rule.
100    pub fn get_start(&self) -> Option<VarId> {
101        self.start
102    }
103
104    /// Sets the starting production rule.
105    pub fn set_start(&mut self, start: VarId) {
106        self.start = Some(start);
107    }
108
109    pub fn get_name(&self) -> Option<&String> {
110        self.name.as_ref()
111    }
112
113    pub fn set_name(&mut self, name: Option<String>) {
114        self.name = name;
115    }
116
117    /// Returns a variable ID that doesn't exist yet.
118    pub fn get_next_available_var(&self) -> VarId {
119        self.prules.len() as VarId   // we don't use self.num_nt for safety reason
120    }
121
122    /// Returns all the non-empty prules
123    pub fn get_prules_iter(&self) -> impl Iterator<Item=(VarId, &ProdRule)> {
124        self.prules.iter().index().filter_map(|(id, p)| if p.is_empty() { None } else { Some((id, p)) })
125    }
126
127    pub fn get_prules_iter_mut(&mut self) -> impl Iterator<Item=(VarId, &mut ProdRule)> {
128        self.prules.iter_mut().enumerate().filter_map(|(id, p)| if p.is_empty() { None } else { Some((id as VarId, p)) })
129    }
130
131    pub fn get_alts(&self) -> impl Iterator<Item=(VarId, &Alternative)> {
132        self.prules.iter().enumerate()
133            .flat_map(|(v, p)| p.iter().map(move |alt| (v as VarId, alt)))
134    }
135
136    pub fn set_symbol_table(&mut self, symbol_table: SymbolTable) {
137        self.symbol_table = Some(symbol_table);
138    }
139
140    pub fn get_symbol_table(&self) -> Option<&SymbolTable> {
141        self.symbol_table.as_ref()
142    }
143
144    pub fn get_num_nt(&self) -> usize {
145        self.num_nt
146    }
147
148    pub fn get_num_t(&self) -> usize {
149        self.num_t
150    }
151
152    /// Adds new flags to `flags[nt]` by or'ing them.
153    /// If necessary, extends the `flags` array first.
154    pub fn set_flags(&mut self, nt: VarId, new_flags: u32) {
155        let nt = nt as usize;
156        if nt >= self.flags.len() {
157            self.flags.resize(nt + 1, 0);
158        }
159        self.flags[nt] |= new_flags;
160    }
161
162    pub fn get_flags(&self, nt: VarId) -> u32 {
163        let nt = nt as usize;
164        if nt < self.flags.len() {
165            self.flags[nt]
166        } else {
167            0
168        }
169    }
170
171    pub(super) fn set_parent(&mut self, child: VarId, parent: VarId) {
172        let child = child as usize;
173        if child >= self.parent.len() {
174            self.parent.resize(child + 1, None);
175        }
176        self.parent[child] = Some(parent);
177    }
178
179    #[allow(unused)]
180    #[cfg(test)] // we keep it here because we might use it later in logs
181    pub(super) fn get_parent(&self, child: VarId) -> Option<VarId> {
182        let child = child as usize;
183        if child >= self.parent.len() {
184            None
185        } else {
186            self.parent[child]
187        }
188    }
189
190    fn get_top_parent(&self, nt: VarId) -> VarId {
191        let mut var = nt;
192        while let Some(parent) = self.parent[var as usize] {
193            var = parent;
194        }
195        var
196    }
197
198    /// Calculates `num_t` and `num_nt` (done right after importing rules).
199    /// - `num_t` is calculated on the basis of the higher symbol found in the production rules,
200    /// so we can drop any unused symbol that is higher and keep the table width down. We can't
201    /// compact the table by removing lower unused symbols, if any, because they are defined by
202    /// the lexer.
203    /// - `num_nt` is simply the number of production rules.
204    pub(super) fn calc_num_symbols(&mut self) {
205        self.num_nt = self.prules.len();
206        self.num_t = self.prules.iter().map(|p|
207            p.iter().map(|f|
208                f.iter().filter_map(|s|
209                    if let Symbol::T(v) = s { Some(*v + 1) } else { None }
210                ).max().unwrap_or(0)
211            ).max().unwrap_or(0)
212        ).max().unwrap_or(0) as usize;
213        self.symbol_table.as_mut().map(|st| st.downsize_num_t(self.num_t));
214        self.flags.resize(self.num_nt, 0);
215        self.parent.resize(self.num_nt, None);
216    }
217
218    /// Simplifies the productions by removing unnecessary empty symbols.
219    pub(super) fn simplify(&mut self) {
220        for p in &mut self.prules {
221            let mut has_empty = false;
222            let mut i = 0;
223            while i < p.len() {
224                let alt = p.get_mut(i).unwrap();
225                let mut j = 0;
226                while j < alt.len() {
227                    if alt[j].is_empty() && (j > 0 || j + 1 < alt.len()) {
228                        alt.v.remove(j);
229                    } else {
230                        j += 1;
231                    }
232                }
233                if alt.len() == 1 && alt[0].is_empty() {
234                    if has_empty {
235                        p.remove(i);
236                    } else {
237                        has_empty = true;
238                        i += 1;
239                    }
240                } else {
241                    i += 1;
242                }
243            }
244        }
245    }
246
247    fn check_num_nt_coherency(&mut self) {
248        if let Some(n) = self.symbol_table.as_ref().and_then(|table| Some(table.get_num_nt())) {
249            let num_nt = self.prules.len();
250            if n != num_nt {
251                self.log.add_error(format!("there are {num_nt} rules but the symbol table has {n} nonterminal symbols: dropping the table"));
252                self.symbol_table = None;
253            }
254        }
255    }
256
257    /// Removes the unused nonterminals and renumbers everything accordingly.
258    /// Note that we don't remove unused T symbols because it would create a coherency problem with the lexer.
259    ///
260    /// Returns the conversion `HashMap[old symbol => new symbol]` for nonterminals.
261    fn cleanup_symbols(&mut self, keep: &mut HashSet<Symbol>) {
262        const VERBOSE: bool = false;
263        let mut nt_content = (0..self.num_nt).map(|v| if self.parent[v].is_none() { Some(v as VarId) } else { None }).to_vec();
264        for (from, cnv) in self.nt_conversion.iter() {
265            // right now, it only contains MovedTo items from the normalization
266            let MovedTo(to) = cnv else { panic!("{cnv:?} unexpected") };
267            nt_content[*from as usize] = None;
268            nt_content[*to as usize] = Some(*from);
269        }
270        if VERBOSE {
271            println!("Removing unused nonterminals:");
272            let mut all_h = self.prules.iter().flat_map(|p| p.iter().map(|x| &x.v).flatten()).cloned().collect::<HashSet<_>>();
273            all_h.extend((0..self.num_nt).map(|i| Symbol::NT(i as VarId)));
274            let mut all = all_h.into_iter().collect::<Vec<_>>();
275            all.sort();
276            println!("  current NT symbols: {}", all.iter().filter_map(|s|
277                if let Symbol::NT(_) = s { Some(format!("{}", s.to_str(self.get_symbol_table()))) } else { None }).join(", "));
278            println!("  current  T symbols: {}", all.iter().filter_map(|s|
279                if let Symbol::T(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(" "));
280            let mut used = keep.iter().collect::<Vec<_>>();
281            used.sort();
282            println!("  used NT symbols:    {}", used.iter().filter_map(|s| if let Symbol::NT(v) = s { Some((v, s)) } else { None })
283                .enumerate().map(|(new_id, (id, s))| format!("{}({id} -> {new_id})", s.to_str(self.get_symbol_table()))).join(", "));
284            println!("  used  T symbols:    {}", used.iter().filter_map(|s|
285                if let Symbol::T(_) = s { Some(s.to_str(self.get_symbol_table())) } else { None }).join(" "));
286            println!("  nt_conversion: {:?}", self.nt_conversion);
287            println!("  nt_content: {}", nt_content.iter().enumerate()
288                .filter_map(|(v, f)| if let Some(from) = f { Some(format!("{v}:{from}")) } else { None } )
289                .join(", ")
290            );
291        }
292        self.nt_conversion.clear();
293        let new_num_nt = keep.iter().filter(|s| matches!(s, Symbol::NT(_))).count() as VarId;
294        let mut new_v = new_num_nt;
295        let mut conv = HashMap::<VarId, VarId>::new();
296        for i in (0..self.num_nt).rev() {
297            let v = i as VarId;
298            let symbol = Symbol::NT(v);
299            if !keep.contains(&symbol) {
300                if VERBOSE { println!("- deleting {}", symbol.to_str(self.get_symbol_table())); }
301                if self.parent[i].is_none() {
302                    self.nt_conversion.insert(v, Removed);
303                }
304                nt_content.remove(i);
305                self.prules.remove(i);
306                self.start = self.start.map(|s| if s >= v { s - 1 } else { s });
307                self.symbol_table.as_mut().map(|t| t.remove_nonterminal(v));
308                self.flags.remove(i);
309                self.parent.remove(i);
310                if self.origin.trees.len() > i {
311                    self.origin.trees.remove(i);
312                }
313
314            } else {
315                new_v -= 1;
316                conv.insert(v, new_v);
317                if VERBOSE { println!("- {symbol:?} -> {:?}", Symbol::NT(new_v)); }
318            }
319        }
320        for p in &mut self.prules {
321            for f in p {
322                for s in &mut f.v {
323                    if let Symbol::NT(s_var) = s {
324                        if let Some(new) = conv.get(s_var) {
325                            *s = Symbol::NT(*new);
326                        }
327                    }
328                }
329                if let Some((ref mut var, _id)) = f.origin {
330                    if let Some(new_var) = conv.get(&var) {
331                        *var = *new_var;
332                    }
333                }
334            }
335        }
336        for p in &mut self.parent {
337            if let Some(parent) = p {
338                if let Some(new) = conv.get(parent) {
339                    *p = Some(*new);
340                }
341            }
342        }
343        for t in &mut self.origin.trees {
344            for mut node in t.iter_depth_simple_mut() {
345                match *node {
346                    GrNode::Symbol(Symbol::NT(ref mut v)) => {
347                        if let Some(new_v) = conv.get(&v) {
348                            *v = *new_v;
349                        }
350                    }
351                    _ => {}
352                }
353            }
354        }
355        let new_map = self.origin.map.iter()
356            .map(|(v1, (v2, id))| (*conv.get(&v1).unwrap_or(&v1), (*conv.get(&v2).unwrap_or(&v2), *id)))
357            .collect::<HashMap<_, _>>();
358        // println!("old origin map: {:?}", self.origin.map);
359        // println!("new origin map: {:?}", new_map);
360        // println!("trees:\n{}", self.origin.trees.iter().enumerate().map(|(i, t)| format!("{i}:{}", grtree_to_str(t, None, None, None))).join("\n"));
361        self.origin.map = new_map;
362        keep.retain(|s| !matches!(s, Symbol::NT(_)));
363        keep.extend((0..new_num_nt as VarId).map(|v| Symbol::NT(v)));
364        self.num_nt = new_num_nt as usize;
365        if VERBOSE {
366            println!("-> nt_content: {}", nt_content.iter().enumerate()
367                .filter_map(|(v, f)| if let Some(from) = f { Some(format!("{v}:{from}")) } else { None })
368                .join(", ")
369            );
370        }
371        for (to, f) in nt_content.into_iter().index() {
372            if let Some(from) = f {
373                if from != to {
374                    self.nt_conversion.insert(from, MovedTo(to as VarId));
375                } else {
376                    if self.nt_conversion.get(&from) == Some(&Removed) {
377                        self.nt_conversion.remove(&from);
378                    }
379                }
380            }
381        };
382        if VERBOSE { println!("-> nt_conversion: {:?}", self.nt_conversion); }
383    }
384
385    pub fn calc_first(&mut self) -> HashMap<Symbol, HashSet<Symbol>> {
386        const VERBOSE: bool = false;
387        if self.start.is_none() {
388            self.log.add_error("calc_first: start NT symbol not defined");
389        }
390        if self.num_nt == 0 {
391            self.log.add_error("calc_first: no nonterminal in grammar".to_string());
392        }
393        if !self.log.has_no_errors() {
394            return HashMap::new();
395        }
396        let mut symbols = HashSet::<Symbol>::new();
397        let mut stack = vec![Symbol::NT(self.start.unwrap())];
398        while let Some(sym) = stack.pop() {
399            if !symbols.contains(&sym) {
400                symbols.insert(sym);
401                if let Symbol::NT(v) = sym {
402                    stack.extend(self.prules[v as usize].iter().map(|x| &x.v).flatten());
403                }
404            }
405        }
406        if symbols.iter().filter(|s| matches!(s, Symbol::NT(_))).count() != self.num_nt {
407            let nt_removed = (0..self.num_nt as VarId)
408                // warnings about symbols that are not used but that have not been moved because of a */+ L-form:
409                .filter(|v| !symbols.contains(&Symbol::NT(*v)) && !matches!(self.nt_conversion.get(v), Some(MovedTo(_))))
410                .map(|v| Symbol::NT(v))
411                .to_vec();
412            if !nt_removed.is_empty() {
413                self.log.add_warning(format!("calc_first: unused nonterminals: {}",
414                                             nt_removed.into_iter().map(|s| format!("{:?} = {}", s, s.to_str(self.get_symbol_table()))).join(", ")));
415            }
416            self.cleanup_symbols(&mut symbols);
417        }
418
419        let unused_t = (0..self.num_t)
420            .filter_map(|t_id| {
421                let s = Symbol::T(t_id as VarId);
422                if !symbols.contains(&s) { Some(format!("T({t_id}) = {}", s.to_str(self.get_symbol_table()))) } else { None }
423            }).to_vec();
424        if unused_t.len() > 0 {
425            self.log.add_warning(format!("calc_first: unused terminals: {}", unused_t.join(", ")))
426        }
427
428        let mut first = symbols.into_iter().map(|sym| {
429            match &sym {
430                Symbol::T(_) | Symbol::Empty => (sym, hashset![sym]),
431                Symbol::NT(_) => (sym, HashSet::new()),
432                Symbol::End => panic!("found reserved symbol {sym:?} in production rules"),
433            }
434        }).collect::<HashMap<_, _>>();
435        let mut change = true;
436        let rules = (0..self.num_nt as VarId).filter(|var| first.contains_key(&Symbol::NT(*var))).to_vec();
437        if VERBOSE { println!("rules: {}", rules.iter().map(|v| Symbol::NT(*v).to_str(self.symbol_table.as_ref())).join(", ")); }
438        while change {
439            change = false;
440            for i in &rules {
441                let prule = &self.prules[*i as usize];
442                let symbol = Symbol::NT(*i as VarId);
443                if VERBOSE { println!("- {} -> {}", symbol.to_str(self.symbol_table.as_ref()), prule_to_str(prule, self.symbol_table.as_ref())); }
444                let num_items = first[&symbol].len();
445                for alt in prule {
446                    if VERBOSE { println!("  - {}", alt.to_str(self.symbol_table.as_ref())); }
447                    assert!(alt.len() > 0, "empty alternative for {}: {}",
448                            symbol.to_str(self.symbol_table.as_ref()), alt.to_str(self.symbol_table.as_ref()));
449                    if VERBOSE {
450                        print!("    [0] {}", alt[0].to_str(self.symbol_table.as_ref()));
451                        println!(", first = {}", first[&alt[0]].iter().map(|s| s.to_str(self.symbol_table.as_ref())).join(", "));
452                    }
453                    let new = alt.calc_alt_first(&first);
454                    let _n = first.get(&symbol).unwrap().len();
455                    first.get_mut(&symbol).unwrap().extend(new);
456                    if VERBOSE {
457                        if first.get(&symbol).unwrap().len() > _n {
458                            println!("    first[{}] -> {}", symbol.to_str(self.get_symbol_table()),
459                                     first.get(&symbol).unwrap().iter().map(|s| s.to_str(self.get_symbol_table())).join(", "));
460                        }
461                    }
462                }
463                change |= first[&symbol].len() > num_items;
464            }
465            if VERBOSE && change { println!("---------------------------- again"); }
466        }
467        if self.num_t == 0 {
468            self.log.add_error("calc_first: no terminal in grammar".to_string());
469        }
470        first
471    }
472
473    pub fn calc_follow(&self, first: &HashMap<Symbol, HashSet<Symbol>>) -> HashMap<Symbol, HashSet<Symbol>> {
474        const VERBOSE: bool = false;
475        assert!(self.start.is_some(), "start NT symbol not defined");
476        if !self.log.has_no_errors() {
477            return HashMap::new();
478        }
479        let mut follow = first.iter()
480            .filter_map(|(s, _)| if matches!(s, Symbol::NT(_)) { Some((*s, HashSet::<Symbol>::new())) } else { None })
481            .collect::<HashMap<_, _>>();
482        follow.get_mut(&Symbol::NT(self.start.unwrap())).unwrap().insert(Symbol::End);
483        let rules = (0..self.num_nt as VarId).filter(|var| follow.contains_key(&Symbol::NT(*var))).to_vec();
484        let mut change = true;
485        while change {
486            change = false;
487            for i in &rules {
488                let prule = &self.prules[*i as usize];
489                let symbol = Symbol::NT(*i as VarId);
490                if VERBOSE { println!("- {} -> {}", symbol.to_str(self.symbol_table.as_ref()), prule_to_str(prule, self.symbol_table.as_ref())); }
491                for alt in prule {
492                    if VERBOSE { println!("  - {}", alt.to_str(self.symbol_table.as_ref())); }
493                    let mut trail = follow.get(&symbol).unwrap().clone();
494                    for sym_i in alt.iter().rev() {
495                        if let Symbol::NT(_) = sym_i {
496                            let num_items = follow.get(sym_i).unwrap().len();
497                            follow.get_mut(sym_i).unwrap().extend(&trail);
498                            if VERBOSE {
499                                if follow.get(sym_i).unwrap().len() > num_items {
500                                    println!("    follow[{}] -> {}", sym_i.to_str(self.get_symbol_table()),
501                                             follow.get(sym_i).unwrap().iter().map(|s| s.to_str(self.get_symbol_table())).join(", "));
502                                }
503                            }
504                            change |= follow.get(sym_i).unwrap().len() > num_items;
505                            if first[sym_i].contains(&Symbol::Empty) {
506                                trail.extend(first[sym_i].iter().filter(|s| *s != &Symbol::Empty));
507                            } else {
508                                trail.clear();
509                                trail.extend(&first[sym_i]);
510                            }
511                        } else {
512                            trail.clear();
513                            trail.insert(*sym_i);
514                        }
515                    }
516                }
517            }
518            if VERBOSE && change { println!("---------------------------- again"); }
519        }
520        follow
521    }
522
523    /// Eliminates recursion from production rules, removes potential ambiguity, and updates the symbol table if provided.
524    /// ```eq
525    /// A -> αi A | A βj | A γk A | δl
526    /// ```
527    /// becomes
528    /// ```eq
529    /// A[p] -> A[indep] ( βj | γk E[pk] )*
530    /// ```
531    /// then
532    /// ```eq
533    /// A[p]  -> A[indep] Ab[p]
534    /// Ab[p] -> βj Ab[p] | γk A[var] Ab[p] | ε
535    /// ```
536    pub(super) fn remove_recursion(&mut self) {
537        /// Maximum number of P/I alternatives that are distributed before creating a new nonterminal to hold them.
538        /// They are never distributed in presence of a binary (L/R) because it would likely induce left factorization.
539        const MAX_DISTRIB_LEN: Option<usize> = None; // always distributing makes for smaller tables
540        const DONT_DISTRIB_IN_AMBIG: bool = true;   // E -> E * E | E + E | F  will keep F in an independent NT
541
542        const VERBOSE: bool = false;
543
544        self.log.add_note("removing left / binary recursion in grammar...");
545        self.check_num_nt_coherency();
546        if VERBOSE {
547            println!("ORIGINAL:");
548            self.print_rules(false, false);
549        }
550        let mut var_new = self.get_next_available_var() as usize;
551        // we must take prules out because of the borrow checker and other &mut borrows we need later...
552        let mut prules = take(&mut self.prules);
553        let mut ambig_alt_id = 0;
554        for var in 0..var_new {
555            let prule = prules.get_mut(var).unwrap();
556            let var = var as VarId;
557            let symbol = Symbol::NT(var);
558            let var_name = symbol.to_str(self.get_symbol_table());
559            let mut extra_prods = Vec::<ProdRule>::new();
560            if prule.iter().any(|p| !p.is_empty() && (p.first().unwrap() == &symbol)) {
561                let orig_str = format!("{var_name} -> {}", prule_to_str(prule, self.get_symbol_table()));
562                self.log.add_note(format!("- modifying: {orig_str}"));
563                if VERBOSE { println!("processing: {orig_str}"); }
564
565                let (indep, mut alts) : (Vec<_>, Vec<_>) = take(prule).into_iter()
566                    .partition(|alt| alt.first().unwrap() != &symbol && alt.last().unwrap() != &symbol);
567                alts.reverse();
568                let mut prec_eq = alts.iter_mut()
569                    .map(|f| {
570                        let p = f.flags & ruleflag::PREC_EQ != 0;
571                        f.flags &= !ruleflag::PREC_EQ;
572                        p
573                    }).to_vec();
574                prec_eq.pop();
575                prec_eq.insert(0, false);
576                if indep.is_empty() {
577                    self.log.add_error(format!("recursive rules must have at least one independent alternative: {} -> {}",
578                                               Symbol::NT(var).to_str(self.get_symbol_table()),
579                                               prule_to_str(prule, self.get_symbol_table())));
580                    prule.extend(alts);
581                    prule.extend(indep);
582                    continue;
583                }
584
585                // below, the variable indices are mostly related to the variables that will be created to transform
586                // the current rule. E[0] is the current `var`, E[1], ..., E[n-1] the children.
587
588                let mut var_i = 0;        // index of E[i] variable used in a rule (... | α[i] E[i] | ...)
589                let mut rule_var_i = 0;   // index of E[i] variable defined as a rule (E[i] -> ...)
590                let mut var_alts: Vec<Vec<AltId>> = vec![vec![]]; // pr_rule[i] = alternatives present in E[i]
591                let mut indep_alts = Vec::<AltId>::new();
592                let mut pr_info = Vec::<AltInfo>::new();  // information on each alternative: type, priority, ...
593                let mut has_ambig = false;
594
595                for (i, alt) in alts.iter().index::<AltId>() {
596                    let ty = AltType::from(&symbol, alt);
597                    has_ambig |= ty == AltType::LeftAssoc || ty == AltType::RightAssoc;
598                    var_i = match ty {
599                        AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
600                        AltType::LeftAssoc => if prec_eq[i as usize] { var_i } else { var_i + 1 },
601                        AltType::Prefix
602                        | AltType::RightAssoc => if var_i > rule_var_i || prec_eq[i as usize] || var_alts[var_i].is_empty() { var_i } else { var_i + 1 },
603                        AltType::Suffix => var_i,
604                    };
605                    let alt_info = AltInfo {
606                        pred_priority: if ty == AltType::Prefix { None } else { Some(i) },
607                        ivar: var_i,
608                        ty
609                    };
610                    pr_info.push(alt_info);
611                    let top_maybe = match ty {
612                        AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
613                        AltType::LeftAssoc => Some(var_i - 1),    // uses alternative in rules of < priority
614                        AltType::Prefix => None,                  // uses alternative in independent rule only
615                        AltType::RightAssoc
616                        | AltType::Suffix => Some(var_i),         // uses alternative in rules of <= priority
617                    };
618                    if let Some(top) = top_maybe {
619                        rule_var_i = top;
620                        var_alts.resize(1 + top, vec![]);
621                        (0..=top).for_each(|v| var_alts[v].push(i));
622                    } else {
623                        indep_alts.push(i);
624                    }
625                    let var_alt_str = format!(
626                        "[{i:2}] {:15}: {:10} => var_i = {var_i:2}, rule_var_i = {rule_var_i:2}, {{{}}}",
627                        alt.to_str(self.get_symbol_table()),
628                        format!("{ty:?}"), // "debug ignores width" bug https://github.com/rust-lang/rust/issues/55584
629                        var_alts.iter().enumerate().map(|(i, va)| format!("{i}: {}", va.iter().join(","))).join("  "));
630                    self.log.add_note(format!("  - {var_alt_str}"));
631                    if VERBOSE { println!("- {var_alt_str}"); }
632                };
633                assert!(var_i <= rule_var_i + 1, "var_i = {var_i}, rule_var_i = {rule_var_i}");
634
635                // (var, prime) for each rule except independent alternatives.
636                // CAUTION! Includes the independent NT if last op is left-assoc
637
638                let need_indep = indep.len() + indep_alts.len() > 1
639                    && (MAX_DISTRIB_LEN.map(|max| indep.len() + indep_alts.len() > max).unwrap_or(false) || has_ambig || rule_var_i < var_i)
640                    || DONT_DISTRIB_IN_AMBIG && has_ambig;
641                let num_indep = if need_indep { 1 } else { 0 };
642                let mut var_i_nt = Vec::<(VarId, VarId)>::with_capacity(var_i + 1);
643                var_i_nt.push((var, var_new as VarId));
644                var_i_nt.extend((0..var_i).map(|i| ((var_new + i*2 + 1) as VarId, (var_new + i*2 + 2) as VarId)));
645                var_new += rule_var_i * 2 + 1 + num_indep;
646                if VERBOSE { println!("adding {} variables (w/o independent alternatives), need_indep = {need_indep}, var_new: {} -> {var_new}",
647                                      rule_var_i * 2 + 1 + num_indep, var_new - (rule_var_i * 2 + 1 + num_indep)); }
648                let nt_indep_maybe = if need_indep { Some(var_new as VarId - 1) } else { None };
649                if var_new > VarId::MAX as usize {
650                    self.log.add_error(format!("too many nonterminals when expanding {var_name}: {var_new} > {}", VarId::MAX));
651                    return;
652                }
653
654                // prepares the operation alternative parts, which will be assembled in each rule according to their priority
655                // (the Ab[p] loop nonterminals will be added later since they're specific to each rule)
656                let new_alts = alts.iter().zip(&pr_info).map(|(a, AltInfo { ivar, ty, .. })| {
657                    let mut new_a: Alternative;
658                    match ty {
659                        AltType::LeftAssoc | AltType::RightAssoc => {
660                            new_a = Alternative::new(a.v[1..a.len() - 1].to_owned());
661                            if need_indep || *ivar <= rule_var_i {
662                                new_a.v.push(Symbol::NT(var_i_nt[*ivar].0));
663                            } else {
664                                new_a.v.extend(&indep[0].v);
665                            }
666                            if ty == &AltType::RightAssoc {
667                                new_a.flags = ruleflag::R_ASSOC;
668                            }
669                        }
670                        AltType::Suffix => {
671                            new_a = Alternative::new(a.v[1..a.len()].to_owned());
672                        }
673                        AltType::Prefix => {
674                            new_a = Alternative::new(a.v[..a.len() - 1].to_owned());
675                            new_a.v.push(Symbol::NT(var_i_nt[*ivar].0));
676                        }
677                        AltType::Independant => panic!("there can't be an independent alternative in `alts`"),
678                    }
679                    new_a.flags |= a.flags & ruleflag::ALTERNATIVE_INFO;
680                    new_a.origin = a.origin;
681                    new_a.ambig_alt_id = Some(ambig_alt_id);
682                    ambig_alt_id += 1;
683                    new_a
684                }).to_vec();
685                let mut used_sym = HashSet::<Symbol>::new();
686                let mut prod_indep = new_alts.iter()
687                    .zip(&pr_info)
688                    .filter_map(|(nf, AltInfo { ty, .. })| if ty == &AltType::Prefix { Some(nf.clone()) } else { None })
689                    .to_vec();
690                // when the lowest priority alternative is P or R, all the alternatives
691                // of the first variable rule will sprout an ambiguity:
692                let greedy_prologue = pr_info[0].ty == AltType::Prefix && need_indep || pr_info[0].ty == AltType::RightAssoc;
693                for (i, va) in var_alts.into_iter().enumerate() {
694                    let (nt, nt_loop) = var_i_nt[i];
695                    let prod_nt = if let Some(nt_indep) = nt_indep_maybe {
696                        prule!(nt nt_indep, nt nt_loop)
697                    } else {
698                        // distributes the independent alternatives (only works if there are no L/R types in the original rule)
699                        prod_indep.iter().cloned()
700                            .chain(indep.iter().map(|a| {
701                            let mut new_a = a.clone();
702                            new_a.push(sym!(nt nt_loop));
703                            new_a
704                        })).collect()
705                    };
706                    let mut new_used_sym = Vec::<Symbol>::new();
707                    let mut prod_nt_loop = va.iter().enumerate().rev().map(|(_, &a_id)| {
708                        let mut f = new_alts[a_id as usize].clone();
709                        f.v.push(Symbol::NT(nt_loop));
710                        let sym = f.first().unwrap();
711                        let is_used_sym = used_sym.contains(sym);
712                        if !is_used_sym {
713                            new_used_sym.push(*sym);
714                        }
715                        if is_used_sym || (i == 0 && greedy_prologue) {
716                            f.flags |= ruleflag::GREEDY;
717                        }
718                        if !has_ambig && pr_info[a_id as usize].ty == AltType::Suffix {
719                            self.set_flags(nt, ruleflag::PARENT_L_RECURSION);
720                            self.set_flags(nt_loop, ruleflag::CHILD_L_RECURSION);
721                        }
722                        f
723                    }).to_vec();
724                    used_sym.extend(new_used_sym);
725                    prod_nt_loop.push(alt!(e));
726                    if i == 0 {
727                        *prule = prod_nt;
728                        extra_prods.push(prod_nt_loop);
729                        self.symbol_table.as_mut().map(|t| {
730                            assert_eq!(t.add_child_nonterminal(var), var_i_nt[0].1);
731                        });
732                    } else {
733                        extra_prods.extend([prod_nt, prod_nt_loop]);
734                        self.symbol_table.as_mut().map(|t| {
735                            assert_eq!(t.add_child_nonterminal(var), var_i_nt[i].0);
736                            assert_eq!(t.add_child_nonterminal(var), var_i_nt[i].1);
737                        });
738                    }
739                    if has_ambig {
740                        self.set_flags(nt, ruleflag::PARENT_L_RECURSION);
741                        self.set_flags(nt_loop, ruleflag::CHILD_L_RECURSION);
742                    }
743                }
744                if need_indep {
745                    self.symbol_table.as_mut().map(|t| {
746                        assert_eq!(t.add_child_nonterminal(var), nt_indep_maybe.unwrap());
747                    });
748                }
749                if VERBOSE {
750                    println!("new alternatives: {}", new_alts.iter().enumerate()
751                        .filter_map(|(i, na)| if na.is_empty() { None } else { Some(format!("[{i}] {}", na.to_str(self.get_symbol_table()))) }).join(", "));
752                    println!("vars: {}{}", var_i_nt.iter().take(rule_var_i + 1).enumerate().map(|(i, (v1, v2))|
753                        format!("[{i}]:{},{}", Symbol::NT(*v1).to_str(self.get_symbol_table()),
754                                Symbol::NT(*v2).to_str(self.get_symbol_table()))).join("  "),
755                                if need_indep { format!("  [{}]:{}", var_i, Symbol::NT(var_i_nt[var_i].0).to_str(self.get_symbol_table() )) } else { String::new() });
756                }
757                if has_ambig {
758                    self.set_flags(var, ruleflag::PARENT_AMBIGUITY);
759                } else if !prod_indep.is_empty() && !need_indep {
760                    self.set_flags(var, ruleflag::R_RECURSION);
761                }
762                for (nt, nt_prime) in var_i_nt.into_iter().take(rule_var_i + 1) {
763                    if nt != var {
764                        self.set_parent(nt, var);
765                    }
766                    self.set_parent(nt_prime, nt);
767                }
768                if let Some(nt_indep) = nt_indep_maybe {
769                    if !has_ambig && prod_indep.iter().any(|p| p.last() == Some(&Symbol::NT(nt_indep)))
770                        || has_ambig && !prod_indep.is_empty()
771                    {
772                        self.set_flags(nt_indep, ruleflag::R_RECURSION);
773                    }
774                    prod_indep.extend(indep);
775                    self.set_parent(nt_indep, var);
776                    extra_prods.push(prod_indep);
777                }
778                self.parent.resize(var_new, None);
779                self.flags.resize(var_new, 0);
780                self.log.add_note(format!("  => {var_name} -> {}", prule_to_str(prule, self.get_symbol_table())));
781                for (v, p) in extra_prods.iter().index_start(prules.len() as VarId) {
782                    self.log.add_note(
783                        format!("     {} -> {}", Symbol::NT(v).to_str(self.get_symbol_table()), prule_to_str(p, self.get_symbol_table())));
784                }
785            } else if prule.iter().any(|p| !p.is_empty() && p.last().unwrap() == &symbol) {
786                if self.get_flags(var) & ruleflag::CHILD_REPEAT == 0 {
787                    self.set_flags(var, ruleflag::R_RECURSION);
788                }
789            }
790            prules.extend(extra_prods);
791            let incompatibility_flags = ruleflag::R_RECURSION | ruleflag::L_FORM | ruleflag::PARENT_L_RECURSION;
792            let incompatibility_str = "<L> right recursivity + left recursivity";
793            let flags = self.get_flags(var);
794            if flags & incompatibility_flags == incompatibility_flags {
795                let tree = &self.origin.trees[var as usize];
796                self.log.add_error(format!(
797                    "`{var_name} -> {}` has incompatible rule alternatives: {incompatibility_str}",
798                    grtree_to_str(tree, None, None, None, self.get_symbol_table(), false)));
799            }
800        }
801        if VERBOSE {
802            println!("#prules: {}, #flags: {}, #parents:{}", prules.len(), self.flags.len(), self.parent.len());
803            if let Some(ref mut table) = self.symbol_table {
804                println!("table: {} ({})", table.get_nonterminals().join(", "), table.get_num_nt());
805            }
806        }
807        self.prules = prules;
808        self.num_nt = self.prules.len();
809        if VERBOSE {
810            println!("FINAL:");
811            self.print_rules(false, false);
812            println!("FLAGS:\n{}", self.flags.iter().index::<VarId>()
813                .map(|(v, f)| format!("- ({v:2}) {}: {}", Symbol::NT(v).to_str(self.get_symbol_table()), ruleflag::to_string(*f).join(", "))).join("\n"));
814            if let Some(table) = &self.symbol_table {
815                println!("Symbol table:\n-NT: {}\n-T: {}",
816                         table.get_nonterminals().enumerate().map(|(i, nt)| format!("{i}:{nt}")).join(", "),
817                         table.get_terminals().enumerate()
818                             .map(|(i, (t, txt))| format!("{i}:{t}{}", if let Some(st) = txt { format!(" ({st})") } else { String::new() })).join(", "))
819            }
820        }
821    }
822
823    /// Factorizes all the left symbols that are common to several alts by rejecting the non-common part
824    /// to a new non-terminal. Updates the symbol table if provided.
825    ///
826    /// After the factorization, every child has an NT index greater than its parent, even if the
827    /// factorization is performed several times on the same rule.
828    ///
829    /// Algorithm:
830    /// - for each NT
831    ///     - sort alts
832    ///     - repeat as long as there are common starting symbols:
833    ///         - take first group of >1 alts starting with the same symbol `α[0]`
834    ///         - extract the number of starting symbols common to all alts of the group (1 or more): `α`
835    ///         - create a new NT with the group, where `α` has been removed at the beginning
836    pub fn left_factorize(&mut self) {
837        fn similarity(a: &Alternative, b: &Alternative) -> usize {
838            a.iter().zip(b.iter()).take_while(|(a, b)| a == b).count()
839        }
840
841        const VERBOSE: bool = false;
842        self.log.add_note("removing left factorization...");
843        let mut new_var = self.get_next_available_var();
844        // we must take prules out because of the borrow checker and other &mut borrows we need later...
845        let mut prules = take(&mut self.prules);
846        let mut i = 0;
847        while i < prules.len() {
848            let prule = &mut prules[i];
849            let var = i as VarId;
850            i += 1;
851            if prule.len() < 2 {
852                continue
853            }
854            let mut alts = prule.clone();
855            let mut extra = Vec::<ProdRule>::new();
856            let mut changed = false;
857            alts.sort();
858            if VERBOSE { println!("{var}: {} -> {}", Symbol::NT(var).to_str(self.get_symbol_table()), alts.iter().map(|f| f.to_str(self.get_symbol_table())).join(" | ")); }
859            while alts.len() > 1 {
860                let simi = alts.windows(2).enumerate()
861                    .map(|(j, x)| (j, similarity(&x[0], &x[1])))
862                    .skip_while(|(_, s)| *s == 0)
863                    .take_while(|(_, s)| *s != 0)
864                    .to_vec();
865                if simi.is_empty() {
866                    if VERBOSE { println!(" nothing to factorize"); }
867                    break;
868                }
869                changed = true;
870                let min = simi.iter().map(|(_, s)| *s).min().unwrap();
871                let start = simi[0].0;
872                let stop = start + simi.len();
873                let mut factorized = Alternative::new(alts[start].v.iter().take(min).cloned().to_vec());
874                let mut child = alts.drain(start..=stop).to_vec();
875                if VERBOSE { println!("child:\n{}", child.iter().enumerate().map(|(i, c)| format!("- [{i}] = {}  org = {:?}", c.to_str(self.get_symbol_table()), c.origin)).join("\n")); }
876                if child.iter().all(|f| f.is_greedy()) {
877                    factorized.flags |= ruleflag::GREEDY;
878                }
879                for f in &mut child {
880                    f.v.drain(0..min);
881                }
882                if child[0].v.is_empty() {
883                    if self.flags[var as usize] & ruleflag::CHILD_REPEAT != 0 {
884                        factorized.origin = child[0].origin;
885                    }
886                    child[0].v.push(Symbol::Empty);
887                    let empty = child.remove(0);
888                    child.push(empty);
889                }
890                let var_prime = new_var;
891                new_var += 1;
892                self.set_flags(var, ruleflag::PARENT_L_FACTOR);
893                self.set_flags(var_prime, ruleflag::CHILD_L_FACT);
894                let top = self.get_top_parent(var);
895                self.symbol_table.as_mut().map(|table| assert_eq!(table.add_child_nonterminal(top), var_prime));
896                self.set_parent(var_prime, var);
897                let symbol_prime = Symbol::NT(var_prime);
898                factorized.v.push(symbol_prime);
899                alts.insert(start, factorized);
900                if VERBOSE {
901                    println!(" - similarity: {} => {}", simi.iter().map(|(j, s)| format!("{j}:{s}")).join(", "), min);
902                    println!("   factorize: {}", child.iter().map(|a| a.to_str(self.get_symbol_table())).join(" | "));
903                    println!("   left:      {}", alts.iter().map(|a| a.to_str(self.get_symbol_table())).join(" | "));
904                }
905                extra.push(child);
906            }
907            if changed {
908                self.log.add_note(format!(
909                    "- modifying: {} -> {}",
910                    Symbol::NT(var).to_str(self.get_symbol_table()), prule_to_str(prule, self.get_symbol_table())));
911                self.log.add_note(format!(
912                    "  => {} -> {}",
913                    Symbol::NT(var).to_str(self.get_symbol_table()), prule_to_str(&alts, self.get_symbol_table())));
914                *prule = alts;
915                let offset = prules.len() as VarId;
916                for (v, p) in extra.iter().index_start(offset) {
917                    self.log.add_note(format!(
918                        "     {} -> {}",
919                        Symbol::NT(v).to_str(self.get_symbol_table()), prule_to_str(p, self.get_symbol_table())));
920                }
921                prules.extend(extra);
922            }
923        }
924        self.prules = prules;
925        self.num_nt = self.prules.len();
926    }
927
928    pub(crate) fn remove_ambiguity(&self) {
929        todo!()
930    }
931
932    /// Moves the flags of the mask from the alternatives to the NT flags
933    fn transfer_alt_flags(&mut self) {
934        // add other flags here if necessary:
935        const FLAG_MASK: u32 = ruleflag::L_FORM;
936
937        for (v, prule) in self.prules.iter_mut().enumerate() {
938            let flags = prule.iter().fold(0, |acc, alt| acc | alt.flags) & FLAG_MASK;
939            self.flags[v] |= flags;
940            for alt in prule.iter_mut() {
941                alt.flags &= !FLAG_MASK;
942            }
943        }
944    }
945
946    fn check_flags(&mut self) {
947        const FLAG_CHECK_MASK: u32 = ruleflag::L_FORM | ruleflag::CHILD_REPEAT | ruleflag::R_RECURSION;
948        for v in 0..self.num_nt {
949            if self.flags[v] & FLAG_CHECK_MASK == ruleflag::L_FORM {
950                // it's also fine to have L-form on a l-factor children of a right-recursive parent
951                if self.flags[v] & ruleflag::CHILD_L_FACT == 0 || self.flags[self.parent[v].unwrap() as usize] & ruleflag::R_RECURSION == 0 {
952                    self.log.add_error(format!("{} has an illegal flag L-Form (only used with +, *, or right recursion): {}",
953                                               Symbol::NT(v as VarId).to_str(self.get_symbol_table()),
954                                               ruleflag::to_string(self.flags[v]).join(" ")
955                    ));
956                }
957            }
958        }
959    }
960
961    /// Generates a side-by-side comparison between the production rules and the original rule
962    /// each alternative represents (or which part of that rule).
963    ///
964    /// The output can be formatted with [`indent_source`].
965    pub fn prs_alt_origins_str(&self, ansi: bool) -> Vec<String> {
966        let mut cols = vec![vec!["ProdRuleSet".to_string(), "|".to_string(), "Original rules".to_string()]];
967        cols.push(vec!["-----------".to_string(), "|".to_string(), "--------------".to_string()]);
968        // adds the current alternatives
969        cols.extend(self.get_prules_iter()
970            .flat_map(|(v, prule)| prule.iter()
971                .map(move |alt| vec![
972                    alt_to_rule_str(v, &alt.v, self.get_symbol_table()),
973                    "|".to_string(),
974                    if let Some((vo, ido)) = alt.origin {
975                        let tree = &self.origin.trees[vo as usize];
976                        let emphasis = if tree.get_root() == Some(ido) { None } else { Some(ido) };
977                        let orig_rule = grtree_to_str_custom(tree, None, emphasis, Some(vo), self.get_symbol_table(), false, ansi);
978                        format!("{} -> {orig_rule}", Symbol::NT(vo).to_str(self.get_symbol_table()))
979                    } else {
980                        String::new()
981                    }
982                ])
983            ));
984        // adds child nonterminals that represent a part of the original nonterminals
985        cols.extend((0..self.num_nt)
986            .filter_map(|var|
987                self.parent[var]
988                    .and_then(|_| self.origin.map.get(&(var as VarId))
989                        .and_then(|&(v, index)| Some((var as VarId, v, index)))))
990            .map(|(var, v, index)| vec![
991                Symbol::NT(var).to_str(self.get_symbol_table()),
992                "|".to_string(),
993                format!("{} -> {}",
994                        Symbol::NT(v).to_str(self.get_symbol_table()),
995                        grtree_to_str_custom(&self.origin.trees[v as usize], None, Some(index), None, self.get_symbol_table(), false, ansi))
996
997            ]));
998        columns_to_str(cols, None)
999    }
1000}
1001
1002impl<T> LogReader for ProdRuleSet<T> {
1003    type Item = BufLog;
1004
1005    fn get_log(&self) -> &Self::Item {
1006        &self.log
1007    }
1008
1009    fn give_log(self) -> Self::Item {
1010        self.log
1011    }
1012}
1013
1014impl<T> HasBuildErrorSource for ProdRuleSet<T> {
1015    const SOURCE: BuildErrorSource = BuildErrorSource::ProdRuleSet;
1016}
1017
1018impl ProdRuleSet<General> {
1019    fn with_capacity(capacity: usize) -> Self {
1020        Self {
1021            prules: Vec::with_capacity(capacity),
1022            origin: Origin::new(),
1023            num_nt: 0,
1024            num_t: 0,
1025            symbol_table: None,
1026            flags: Vec::with_capacity(capacity),
1027            parent: Vec::with_capacity(capacity),
1028            start: None,
1029            name: None,
1030            nt_conversion: HashMap::new(),
1031            log: BufLog::new(),
1032            _phantom: PhantomData
1033        }
1034    }
1035}
1036
1037impl ProdRuleSet<LL1> {
1038    /// Creates the table for predictive top-down parsing.
1039    ///
1040    /// Returns:
1041    /// - `num_nt` = number of nonterminals
1042    /// - `num_t` = number of terminals (including the end symbol)
1043    /// - `alts`, the production alternatives: (VarId, Alternative) where the first value is the non-terminal index and the second one of its alts
1044    /// - the table of `num_nt * num_t` values, where `table[nt_index * num_nt + t_index]` gives the index of the production alternative for
1045    /// the non-terminal index `nt_index` and the terminal index `t_index`. A value >= `alts.len()` stands for a syntax error.
1046    pub(super) fn calc_table(&mut self, first: &HashMap<Symbol, HashSet<Symbol>>, follow: &HashMap<Symbol, HashSet<Symbol>>, error_recovery: bool) -> LLParsingTable {
1047        fn add_table(table: &mut Vec<Vec<AltId>>, num_t: usize, nt_id: VarId, t_id: VarId, a_id: AltId) {
1048            let pos = nt_id as usize * num_t + t_id as usize;
1049            table[pos].push(a_id);
1050        }
1051        const VERBOSE: bool = false;
1052        const DISABLE_FILTER: bool = false;
1053        if !self.log.has_no_errors() {
1054            return LLParsingTable::new();
1055        }
1056        let mut alts = self.prules.iter().index().filter(|(v, _)| DISABLE_FILTER || first.contains_key(&Symbol::NT(*v)))
1057            .flat_map(|(v, x)| x.iter().map(move |a| (v, a.clone()))).to_vec();
1058        let error_skip = alts.len() as AltId;   // table entry for syntactic error; recovery by skipping input symbol
1059        let error_pop = error_skip + 1;         // table entry for syntactic error; recovery by popping T or NT from stack
1060        let num_nt = self.num_nt;
1061        let num_t = self.num_t + 1;
1062        let end = (num_t - 1) as VarId; // index of end symbol
1063        let mut used_t = HashSet::<Symbol>::new();
1064        let mut table: Vec<Vec<AltId>> = vec![vec![]; num_nt * num_t];
1065        for (a_id, (nt_id, alt)) in alts.iter().index() {
1066            used_t.extend(alt.iter().filter(|s| s.is_t()));
1067            if VERBOSE { println!("- {a_id}: {} -> {}  => {}", Symbol::NT(*nt_id).to_str(self.get_symbol_table()),
1068                                  alt.to_str(self.get_symbol_table()),
1069                                  alt.calc_alt_first(first).iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
1070            let mut has_end = false;
1071            let mut has_empty = false;
1072            for s in alt.calc_alt_first(first) {
1073                match s {
1074                    Symbol::Empty => {
1075                        has_empty = true;
1076                        for s in &follow[&Symbol::NT(*nt_id)] {
1077                            match s {
1078                                Symbol::T(t_id) => add_table(&mut table, num_t, *nt_id, *t_id, a_id),
1079                                Symbol::End     => add_table(&mut table, num_t, *nt_id, end, a_id),
1080                                _ => {}
1081                            }
1082                        }
1083                    }
1084                    Symbol::T(t_id) => {
1085                        add_table(&mut table, num_t, *nt_id, t_id, a_id);
1086                    }
1087                    Symbol::NT(_) => {}
1088                    Symbol::End => {
1089                        has_end = true;
1090                    }
1091                }
1092            }
1093            if has_empty && has_end {
1094                add_table(&mut table, num_t, *nt_id, end, end);
1095            }
1096        }
1097        // creates the table and removes ambiguities
1098        let mut final_table = Vec::<AltId>::new();
1099        for nt_id in 0..num_nt {
1100            for t_id in 0..num_t {
1101                let pos = nt_id * num_t + t_id;
1102                final_table.push(match table[pos].len() {
1103                    0 => {
1104                        if error_recovery {
1105                            let sym_t = if t_id < num_t - 1 { Symbol::T(t_id as TokenId) } else { Symbol::End };
1106                            let sym_nt = Symbol::NT(nt_id as VarId);
1107                            if follow[&sym_nt].contains(&sym_t) || first[&sym_nt].contains(&sym_t) {
1108                                error_pop
1109                            } else {
1110                                error_skip
1111                            }
1112                        } else {
1113                            error_skip
1114                        }
1115                    },
1116                    1 => *table[pos].first().unwrap(),
1117                    _ => {
1118                        // we take the first item which isn't already in another position on the same NT row
1119                        let greedies = table[pos].iter().filter(|&a_id| alts[*a_id as usize].1.is_greedy()).cloned().to_vec();
1120                        if greedies.len() == 1 {
1121                            let chosen = greedies[0];
1122                            self.log.add_note(
1123                                format!("- calc_table: expected ambiguity for NT '{}', T '{}': {} => <{}> is specified as greedy and has been chosen",
1124                                        Symbol::NT(nt_id as VarId).to_str(self.get_symbol_table()),
1125                                        if t_id < self.num_t { Symbol::T(t_id as VarId).to_str(self.get_symbol_table()) } else { "<EOF>".to_string() },
1126                                        table[pos].iter().map(|a_id|
1127                                            format!("<{}>", alts[*a_id as usize].1.to_str(self.get_symbol_table()))).join(" or "),
1128                                        alts[chosen as usize].1.to_str(self.get_symbol_table())
1129                                ));
1130                            table[pos] = greedies;
1131                            chosen
1132                        } else {
1133                            let row = (0..num_t).filter(|j| *j != t_id).flat_map(|j| &table[nt_id * num_t + j]).collect::<HashSet<_>>();
1134                            let chosen = *table[pos].iter().find(|a| !row.contains(a)).unwrap_or(&table[pos][0]);
1135                            self.log.add_warning(
1136                                format!("- calc_table: ambiguity for NT '{}', T '{}': {} => <{}> has been chosen",
1137                                        Symbol::NT(nt_id as VarId).to_str(self.get_symbol_table()),
1138                                        if t_id < self.num_t { Symbol::T(t_id as VarId).to_str(self.get_symbol_table()) } else { "<EOF>".to_string() },
1139                                        table[pos].iter().map(|a_id|
1140                                            format!("<{}>", alts[*a_id as usize].1.to_str(self.get_symbol_table()))).join(" or "),
1141                                        alts[chosen as usize].1.to_str(self.get_symbol_table())
1142                                ));
1143                            table[pos] = vec![chosen];
1144                            chosen
1145                        }
1146                    }
1147                });
1148            }
1149        }
1150        if !(0..num_t - 1).any(|t_id| (0..num_nt).any(|nt_id| final_table[nt_id * num_t + t_id] < error_skip)) {
1151            self.log.add_error("- calc_table: no terminal used in the table".to_string());
1152        }
1153        for (_, a) in &mut alts {
1154            a.flags &= !ruleflag::GREEDY;
1155        }
1156        LLParsingTable { num_nt, num_t, alts, table: final_table, flags: self.flags.clone(), parent: self.parent.clone() }
1157    }
1158
1159    pub fn make_parsing_table(&mut self, error_recovery: bool) -> LLParsingTable {
1160        self.log.add_note("calculating parsing table...");
1161        let first = self.calc_first();
1162        let follow = self.calc_follow(&first);
1163        self.calc_table(&first, &follow, error_recovery)
1164    }
1165
1166    pub fn gen_tables_source_code(&self, indent: usize) -> String {
1167        let st = self.symbol_table.as_ref().unwrap();
1168        let mut source = Vec::<String>::new();
1169        // "origin" preparation
1170        source.push(format!("static ORIGIN: [(Option<usize>, &[(GrNode, &[usize])]); {}] = [", self.origin.trees.len()));
1171        for t in &self.origin.trees {
1172            let tree_str = (0..t.len()).into_iter()
1173                .map(|i| format!("({}, &[{}])", t.get(i).gen_source_code(), t.children(i).into_iter().join(",")))
1174                .join(", ");
1175            source.push(format!("    ({:?}, &[{}]),", t.get_root(), tree_str));
1176        }
1177        source.push("];".to_string());
1178        source.push(format!("static MAP: [(VarId, (VarId, usize)); {}] = [", self.origin.map.len()));
1179        let mut sorted_map = self.origin.map.iter().to_vec();
1180        sorted_map.sort(); // we must sort it so that its output is reproducible
1181        source.extend(sorted_map.chunks(5)
1182            .map(|chk| format!("    {},", chk.into_iter().map(|(a, (c, d))| format!("({a}, ({c}, {d}))")).join(", "))));
1183        source.push("];".to_string());
1184        source.push("let origin = Origin::from_data(".to_string());
1185        source.push("    ORIGIN.into_iter().map(|(root, nodes)| GrTree::from((root, nodes.to_vec()))).collect(),".to_string());
1186        source.push("    HashMap::from(MAP));".to_string());
1187        // ProdRuleSetTables:
1188        source.push(String::new());
1189        source.push("let ll1_tables = ProdRuleSetTables::new(".to_string());
1190        source.push(format!("    {:?},", self.name));
1191        source.push("    vec![".to_string());
1192        source.extend(self.prules.iter().map(|prule| format!("        {},", prule_to_macro(prule))));
1193        source.push("    ],".to_string());
1194        source.push("    origin,".to_string());
1195        source.push(format!("    vec![{}],", st.get_terminals().map(|x| format!("{x:?}")).join(", ")));
1196        source.push(format!("    vec![{}],", st.get_nonterminals().map(|x| format!("{x:?}")).join(", ")));
1197        source.push(format!("    vec![{}],", self.flags.iter().join(", ")));
1198        source.push(format!("    vec![{}],", self.parent.iter().map(|p_maybe| format!("{p_maybe:?}")).join(", ")));
1199        source.push(format!("    {:?},", self.start));
1200        source.push(format!("    hashmap![{}]", self.nt_conversion.iter().map(|(v, conv)| format!("{v} => {conv:?}")).join(", ")));
1201        source.push(");".to_string());
1202        indent_source(vec![source], indent)
1203    }
1204}
1205
1206// ---------------------------------------------------------------------------------------------
1207
1208pub struct ProdRuleSetTables {
1209    name: Option<String>,
1210    prules: Vec<ProdRule>,
1211    origin: Origin<VarId, FromPRS>,
1212    t: Vec<(String, Option<String>)>,   // terminal identifiers and optional representation
1213    nt: Vec<String>,                    // nt to nonterminal identifier
1214    flags: Vec<u32>,
1215    parent: Vec<Option<VarId>>,
1216    start: Option<VarId>,
1217    nt_conversion: HashMap<VarId, NTConversion>,
1218}
1219
1220impl ProdRuleSetTables {
1221    pub fn new<T: Into<String>>(
1222        name: Option<T>,
1223        prules: Vec<ProdRule>,
1224        origin: Origin<VarId, FromPRS>,
1225        t: Vec<(T, Option<T>)>,
1226        nt: Vec<T>,
1227        flags: Vec<u32>,
1228        parent: Vec<Option<VarId>>,
1229        start: Option<VarId>,
1230        nt_conversion: HashMap<VarId, NTConversion>,
1231    ) -> Self {
1232        let t = t.into_iter().map(|(t, t_maybe)| (t.into(), t_maybe.map(|t| t.into()))).collect();
1233        let nt = nt.into_iter().map(|nt| nt.into()).collect();
1234        ProdRuleSetTables {
1235            name: name.map(|s| s.into()),
1236            prules,
1237            origin, t, nt, flags, parent, start, nt_conversion,
1238        }
1239    }
1240
1241    pub fn get_name(&self) -> Option<&String> {
1242        self.name.as_ref()
1243    }
1244}
1245
1246impl BuildFrom<ProdRuleSetTables> for ProdRuleSet<LL1> {
1247    fn build_from(source: ProdRuleSetTables) -> Self {
1248        let mut symbol_table = SymbolTable::new();
1249        symbol_table.extend_terminals(source.t);
1250        symbol_table.extend_nonterminals(source.nt);
1251        ProdRuleSet {
1252            prules: source.prules,
1253            origin: source.origin,
1254            num_nt: symbol_table.get_num_nt(),
1255            num_t: symbol_table.get_num_t(),
1256            symbol_table: Some(symbol_table),
1257            flags: source.flags,
1258            parent: source.parent,
1259            start: source.start,
1260            name: source.name,
1261            nt_conversion: source.nt_conversion,
1262            log: BufLog::new(),
1263            _phantom: PhantomData,
1264        }
1265    }
1266}
1267
1268// ---------------------------------------------------------------------------------------------
1269
1270impl BuildFrom<RuleTreeSet<Normalized>> for ProdRuleSet<General> {
1271    /// Builds a [`ProdRuleSet<General>`] from a [`RuleTreeSet<Normalized>`].
1272    ///
1273    /// If an error is encountered or was already encountered before, an empty shell object
1274    /// is built with the log detailing the error(s).
1275    fn build_from(mut rules: RuleTreeSet<Normalized>) -> Self {
1276        fn children_to_vec(tree: &GrTree, parent_id: usize) -> Alternative {
1277            let mut flags: u32 = 0;
1278            let alt = tree.children(parent_id).iter()
1279                .map(|id| tree.get(*id))
1280                .filter(|node| {
1281                    match **node {
1282                        GrNode::RAssoc => {
1283                            flags |= ruleflag::R_ASSOC;
1284                            false
1285                        }
1286                        GrNode::LForm(_) => {
1287                            flags |= ruleflag::L_FORM;
1288                            false
1289                        }
1290                        GrNode::PrecEq => {
1291                            flags |= ruleflag::PREC_EQ;
1292                            false
1293                        }
1294                        _ => true,
1295                    }
1296                })
1297                .map(|node| {
1298                match node {
1299                    GrNode::Symbol(s) => s.clone(),
1300                    x => panic!("unexpected symbol {x} under &")
1301                }
1302            }).to_vec();
1303            Alternative::new(alt).with_flags(flags)
1304        }
1305        let mut prules = Self::with_capacity(rules.trees.len());
1306        prules.start = rules.start;
1307        prules.symbol_table = rules.symbol_table;
1308        prules.flags = rules.flags;
1309        prules.parent = rules.parent;
1310        prules.nt_conversion = rules.nt_conversion;
1311        prules.log = rules.log;
1312        if !prules.log.has_no_errors() {
1313            // We handle the errors by transmitting the log to the next construct rather than returning a `Result` type.
1314            // This allows to cascade the transforms without getting a complicated error resolving system while preserving
1315            // the information about the errors easily.
1316            return prules;
1317        }
1318        prules.origin = Origin::<VarId, FromPRS>::from_trees_mut(&mut rules.origin.trees);
1319        for (var, tree) in rules.trees.iter().index() {
1320            if !tree.is_empty() {
1321                let root = tree.get_root().expect("tree {var} has no root");
1322                let root_sym = tree.get(root);
1323                let mut prule = match root_sym {
1324                    GrNode::Symbol(s) => {
1325                        let mut alt = Alternative::new(vec![s.clone()]);
1326                        if let Some(&(v, ch)) = rules.origin.map.get(&(var, root)) {
1327                            alt.origin = Some((v, ch));
1328                        }
1329                        vec![alt]
1330                    },
1331                    GrNode::Concat => {
1332                        let mut alt = children_to_vec(tree, root);
1333                        if let Some(&(v, ch)) = rules.origin.map.get(&(var, root)) {
1334                            alt.origin = Some((v, ch));
1335                        }
1336                        vec![alt]
1337                    },
1338                    GrNode::Or => tree.children(root).iter()
1339                        .map(|id| {
1340                            let child = tree.get(*id);
1341                            let mut alt = if let GrNode::Symbol(s) = child {
1342                                Alternative::new(vec![s.clone()])
1343                            } else {
1344                                assert_eq!(*child, GrNode::Concat, "unexpected symbol {child} under |");
1345                                children_to_vec(tree, *id)
1346                            };
1347                            if let Some(&(v, ch)) = rules.origin.map.get(&(var, *id)) {
1348                                alt.origin = Some((v, ch));
1349                            }
1350                            alt
1351                        }).to_vec(),
1352                    s => panic!("unexpected symbol {s} as root of normalized GrTree for NT {}", Symbol::NT(var).to_str(prules.get_symbol_table()))
1353                };
1354                if prule.iter().any(|f| f.flags & ruleflag::L_FORM != 0) {
1355                    // We keep the L flag on the child of +* normalization if it's intended only for that normalization.
1356                    // For example:
1357                    // - A -> (b <L>)+ A | c
1358                    //   - doesn't have an l-form right recursion
1359                    //   - has an l-form repetition of b
1360                    // - A -> <L> (b)+ A | c
1361                    //   - has an l-form right recursion
1362                    //   - doesn't have an l-form repetition of b
1363                    //
1364                    // while let Some(parent) = prules.get_parent(nt) {
1365                    //     nt = parent;
1366                    // }
1367                    prules.set_flags(var, ruleflag::L_FORM);
1368                    // not really necessary, but cleaner:
1369                    for a in prule.iter_mut() {
1370                        a.flags &= !ruleflag::L_FORM;
1371                    }
1372                }
1373                // the children NTs that represent a part of the original NT are stored in ProdRuleSet::origin
1374                // rather than in the alts themselves
1375                if prules.parent[var as usize].is_some() {
1376                    if let Some(&(v, index)) = rules.origin.map.get(&(var, root)) {
1377                        prules.origin.add(var, (v, index));
1378                    }
1379                }
1380                prules.prules.push(prule);
1381            } else {
1382                prules.prules.push(ProdRule::new()); // empty
1383            }
1384        }
1385        prules.calc_num_symbols();
1386        prules
1387    }
1388}
1389
1390impl BuildFrom<RuleTreeSet<General>> for ProdRuleSet<General> {
1391    /// Builds a [`ProdRuleSet<General>`] from a [`RuleTreeSet<General>`].
1392    ///
1393    /// If an error is encountered or was already encountered before, an empty shell object
1394    /// is built with the log detailing the error(s).
1395    fn build_from(rules: RuleTreeSet<General>) -> Self {
1396        let mut prs = ProdRuleSet::build_from(RuleTreeSet::<Normalized>::build_from(rules));
1397        if prs.log.has_no_errors() {
1398            prs.simplify();
1399        }
1400        prs
1401    }
1402}
1403
1404impl BuildFrom<ProdRuleSet<General>> for ProdRuleSet<LL1> {
1405    fn build_from(mut rules: ProdRuleSet<General>) -> Self {
1406        if rules.log.has_no_errors() {
1407            rules.remove_recursion();
1408            rules.left_factorize();
1409            rules.transfer_alt_flags();
1410            rules.check_flags();
1411        }
1412        ProdRuleSet::<LL1> {
1413            prules: rules.prules,
1414            origin: rules.origin,
1415            num_nt: rules.num_nt,
1416            num_t: rules.num_t,
1417            symbol_table: rules.symbol_table,
1418            flags: rules.flags,
1419            parent: rules.parent,
1420            start: rules.start,
1421            name: rules.name,
1422            nt_conversion: rules.nt_conversion,
1423            log: rules.log,
1424            _phantom: PhantomData,
1425        }
1426    }
1427}
1428
1429impl BuildFrom<ProdRuleSet<General>> for ProdRuleSet<LR> {
1430    fn build_from(mut rules: ProdRuleSet<General>) -> Self {
1431        if rules.log.has_no_errors() {
1432            rules.remove_ambiguity();
1433            rules.transfer_alt_flags();
1434            rules.check_flags();
1435        }
1436        ProdRuleSet::<LR> {
1437            prules: rules.prules,
1438            origin: rules.origin,
1439            num_nt: rules.num_nt,
1440            num_t: rules.num_t,
1441            symbol_table: rules.symbol_table,
1442            flags: rules.flags,
1443            parent: rules.parent,
1444            start: rules.start,
1445            name: rules.name,
1446            nt_conversion: rules.nt_conversion,
1447            log: rules.log,
1448            _phantom: PhantomData,
1449        }
1450    }
1451}
1452
1453// ---------------------------------------------------------------------------------------------
1454// Supporting debug functions
1455
1456impl<T> ProdRuleSet<T> {
1457    pub fn print_rules(&self, as_comment: bool, filter_empty_nt: bool) {
1458        let prefix = if as_comment { "            // " } else { "    " };
1459        println!("{prefix}{}",
1460                 self.get_prules_iter()
1461                     .filter(|(_, rule)| !filter_empty_nt || **rule != prule!(e))
1462                     .map(|(var, p)|
1463                         format!("({var}) {} -> {}",
1464                                 Symbol::NT(var).to_str(self.get_symbol_table()),
1465                                 prule_to_str(p, self.get_symbol_table())))
1466                     .join(&format!("\n{prefix}")));
1467    }
1468
1469    pub fn print_alts(&self) {
1470        println!("Alternatives:\n{}",
1471                 self.get_alts().enumerate().map(|(id, (v, a))|
1472                     format!("    // - {id}: {} -> {}{}",
1473                             Symbol::NT(v).to_str(self.get_symbol_table()),
1474                             a.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
1475                             if a.flags != 0 { format!("     {} ({})", ruleflag::to_string(a.flags).join(" | "), a.flags) } else { "".to_string() }
1476                     )
1477        ).join("\n"));
1478    }
1479}
1480
1481impl LLParsingTable {
1482    pub fn print(&self, symbol_table: Option<&SymbolTable>, indent: usize) {
1483        let LLParsingTable { num_nt, num_t, alts, table, .. } = self;
1484        let error_skip = alts.len() as AltId;
1485        let error_pop = error_skip + 1;
1486        let str_nt = (0..*num_nt).map(|i| Symbol::NT(i as VarId).to_str(symbol_table)).to_vec();
1487        let max_nt_len = str_nt.iter().map(|s| s.len()).max().unwrap();
1488        let str_t = (0..*num_t).map(|j| if j + 1 < *num_t { Symbol::T(j as VarId).to_str(symbol_table) } else { "$".to_string() }).to_vec();
1489        let t_len = str_t.iter().map(|s| s.len().max(3)).to_vec();
1490        println!("{:<i$}// {:<w$} | {}", "", "", (0..*num_t).map(|j| format!("{:^w$}", str_t[j], w = t_len[j])).join(" "), w = max_nt_len, i = indent);
1491        println!("{:<i$}// {:-<w$}-+-{:-<t$}", "", "", "", w = max_nt_len, t = *num_t + t_len.iter().sum::<usize>(), i = indent);
1492        for i in 0..*num_nt {
1493            print!("{:<i$}// {:<w$} |", "", str_nt[i], w = max_nt_len, i = indent);
1494            for j in 0..*num_t {
1495                let value = table[i * num_t + j];
1496                if value < error_skip {
1497                    print!(" {:^w$}", value, w = t_len[j]);
1498                } else {
1499                    print!(" {:^w$}", if value == error_pop { "p" } else { "." }, w = t_len[j]);
1500                }
1501            }
1502            println!();
1503        }
1504    }
1505}