lexigram_lib/grammar/
mod.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3pub mod tests;
4pub mod origin;
5mod prs;
6
7pub use prs::*;
8
9use std::collections::{HashMap, HashSet};
10use std::fmt::{Display, Formatter};
11use std::marker::PhantomData;
12use std::mem::take;
13use std::ops::{Deref, DerefMut};
14use iter_index::IndexerIterator;
15use vectree::VecTree;
16use lexigram_core::alt::ruleflag;
17use lexigram_core::CollectJoin;
18use crate::cproduct::CProduct;
19use crate::{alt, gnode, hashset, indent_source, prule, sym, vaddi, General, Normalized, TokenId, VarId, LL1, LR};
20use crate::fixed_sym_table::SymInfoTable;
21use crate::grammar::NTConversion::{MovedTo, Removed};
22use crate::grammar::origin::{FromPRS, FromRTS, Origin};
23use lexigram_core::log::{BufLog, LogReader, LogStatus, Logger};
24use crate::build::{BuildErrorSource, BuildFrom, HasBuildErrorSource};
25use crate::parser::Symbol;
26use crate::SymbolTable;
27
28#[derive(Clone, Copy, PartialEq, Debug)]
29pub enum GrNode {
30    Symbol(Symbol),
31    Concat,
32    Or,
33    Maybe,
34    Plus,
35    Star,
36    /// L-form attribute of an alternative or a `+` / `*` repetition expression.
37    /// - `+` and `*` expressions are either folded or iterative (also called "low latency", since the listener is
38    ///   called back immediately after parsing each item, whereas the folded form is only called once all the
39    ///   items have been parsed and gathered).
40    ///   - The default form is folded. With that form, the items of the repetitions are automatically gathered and handed
41    ///     to the listener callback as an array (if the items have a value) once all items have been parsed. For example,
42    ///     `A -> a (b)* c` gives a context with the values of a, b, c as variant
43    ///     `enum CtxA { A { a: String, b: Vec<String>, c: String } }`.
44    ///   - If the L-form is specified, the listener callback is called at each iteration, with a context giving the parsed
45    ///     items of that iteration which have a value. The NT used in that loop is defined with the L-form (`LForm(VarId)`),
46    ///     and its value serves as accumulator to fold all the successive items into a single value presented in the context
47    ///     of the alternative that includes the `+` or `*` repetition. For example, `A -> a (<L=AIter> b)* c` uses `AIter`,
48    ///     and each time a `b` value is parsed, the listener callback receives a context variant
49    ///     `enum CtxAIter { AIter1 { iter: SynAIter, b: String } }`. The callback must return the new `SynAIter` value.
50    ///     Once all the iterations are parsed, `c` is parsed, and the listener callback receives the context for `A`:
51    ///     `CtxA { A { a: String, star: SynAIter, c: String } }`.
52    /// - Right-recursive rules are either stacked or "low-latency".
53    ///   - The default form is stacked. A rule `A -> id A | stop` parsing "id1 id2 id3 stop1" yields a sequence
54    ///     - `A -> id1 A(1)`, `A(1) -> id2 A(2)`, `A(2) -> id3 A(3)`, `A(3) -> stop1`
55    ///
56    ///     Since it's recursive, the listener callback is first called for A(3), then A(2), A(1), and finally A. The parser
57    ///     puts the intermediate values of `id` on the stack, and once `stop1` is reached, it calls the callback with it,
58    ///     then unstacks all the `id` values for the successive callbacks with `id = id3`, `id2`, and finally `id1`,
59    ///     together with the loop value `A`, which is updated each time by the callback.
60    ///   - The low-latency form, whose `VarId` points to its own NT, calls the listener callback at each iteration and
61    ///     doesn't accumulate values on the stack. In the example above, it's first called with `id = id1`, `id2`, `id3`,
62    ///     and finally `stop = stop1`, together with the loop value `A`, which is updated each time by the callback.
63    LForm(VarId),   // applied to NT
64    RAssoc,         // applied to alternative, right-associative
65    PrecEq,         // applied to alternative, same precedence as previous alternative
66    Instance,       // instance of * or + in reference origin trees
67}
68
69impl Display for GrNode {
70    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
71        match self {
72            GrNode::Symbol(s) => write!(f, "{s}"),
73            GrNode::Concat => write!(f, "&"),
74            GrNode::Or => write!(f, "|"),
75            GrNode::Maybe => write!(f, "?"),
76            GrNode::Plus => write!(f, "+"),
77            GrNode::Star => write!(f, "*"),
78            GrNode::LForm(v) => write!(f, "<L={v}>"),
79            GrNode::RAssoc => write!(f, "<R>"),
80            GrNode::PrecEq => write!(f, "<P>"),
81            GrNode::Instance => write!(f, "inst "),
82        }
83    }
84}
85
86impl GrNode {
87    pub fn to_str(&self, symbol_table: Option<&SymbolTable>) -> String {
88        match self {
89            GrNode::Symbol(s) => symbol_table.map(|t| t.get_str(s)).unwrap_or(s.to_string()),
90            GrNode::LForm(v) => format!("<L={}>", symbol_table.map(|t| t.get_str(&Symbol::NT(*v))).unwrap_or(v.to_string())),
91            _ => self.to_string()
92        }
93    }
94
95    pub fn gen_source_code(&self) -> String {
96        match self {
97            GrNode::Symbol(s) => format!("gnode!({})", s.to_macro_item()),
98            GrNode::Concat    => "gnode!(&)".to_string(),
99            GrNode::Or        => "gnode!(|)".to_string(),
100            GrNode::Maybe     => "gnode!(?)".to_string(),
101            GrNode::Plus      => "gnode!(+)".to_string(),
102            GrNode::Star      => "gnode!(*)".to_string(),
103            GrNode::LForm(v)  => format!("gnode!(L {v})"),
104            GrNode::RAssoc    => "gnode!(R)".to_string(),
105            GrNode::PrecEq    => "gnode!(P)".to_string(),
106            GrNode::Instance  => "gnode!(inst)".to_string(),
107        }
108    }
109
110    pub fn is_modifier(&self) -> bool {
111        matches!(self, GrNode::LForm(_) | GrNode::RAssoc | GrNode::PrecEq)
112    }
113
114    pub fn is_empty(&self) -> bool {
115        matches!(self, GrNode::Symbol(Symbol::Empty))
116    }
117}
118
119// ---------------------------------------------------------------------------------------------
120
121/// Simple index object that returns `Original(<value>)` on the first `index.get()`, then
122/// `Copy(<value>)` on subsequent calls. The indices are stored on 31 bits, keeping one bit
123/// for the 'original' flag. Trying to store larger values triggers a panic.
124#[derive(Clone, Copy)]
125pub struct Dup {
126    index: u32
127}
128
129#[derive(Clone, Copy, Debug)]
130enum DupVal {
131    Original(u32),
132    Copy(u32)
133}
134
135impl Dup {
136    const MASK: u32 = 1 << (u32::BITS - 1);
137
138    fn new(index: usize) -> Self {
139        assert!(index < Self::MASK as usize);
140        Self { index: index as u32 }
141    }
142
143    fn get(&mut self) -> DupVal {
144        let idx = self.index;
145        if idx & Self::MASK == 0 {
146            self.index |= Self::MASK;
147            DupVal::Original(idx)
148        } else {
149            DupVal::Copy(idx & !Self::MASK)
150        }
151    }
152}
153
154impl std::fmt::Debug for Dup {
155    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
156        write!(f, "Dup{{")?;
157        if self.index & Self::MASK == 0 {
158            write!(f, "{}}}", self.index)
159        } else {
160            write!(f, "copy {}}}", self.index & !Dup::MASK)
161        }
162    }
163}
164
165// ---------------------------------------------------------------------------------------------
166
167pub type GrTree = VecTree<GrNode>;
168
169/// Builds the string representation of the [`GrTree`], using the [`symbol table`](SymbolTable) if available
170/// and optionally starting at node `node`. If `emphasis` contains a node ID, this subpart of the tree
171/// is emphasized in the string.
172pub fn grtree_to_str_custom(tree: &GrTree, node: Option<usize>, emphasis: Option<usize>, nt: Option<VarId>, symbol_table: Option<&SymbolTable>, simplified: bool, ansi: bool)
173                            -> String
174{
175    fn pr_join(children: Vec<(u32, String)>, str: &str, pr: u32) -> (u32, String) {
176        (pr, children.into_iter()
177            .map(|(p_ch, ch)| if p_ch >= pr { ch } else { format!("({ch})") })
178            .join(str))
179    }
180
181    fn pr_append(child: (u32, String), str: &str, pr: u32) -> (u32, String) {
182        (pr, if child.0 >= pr { format!("{}{str}", child.1) } else { format!("({}){str}", child.1) })
183    }
184
185    const PR_PROD: u32 = 1;
186    const PR_TERM: u32 = 2;
187    const PR_FACTOR: u32 = 3;
188    const PR_ATOM: u32 = 4;
189
190    const BEFORE: &str = " ►► ";
191    const AFTER : &str = " ◄◄ ";
192    const BEFORE_ANSI: &str = "\u{1b}[4;1;36m";
193    const AFTER_ANSI : &str = "\u{1b}[0m";
194
195    let mut children = vec![];
196    if tree.is_empty() {
197        return "<empty>".to_string();
198    }
199    let before = if ansi { BEFORE_ANSI } else { BEFORE };
200    let after = if ansi { AFTER_ANSI } else { AFTER };
201    let top = node.unwrap_or_else(|| tree.get_root().unwrap());
202    for node in tree.iter_depth_simple_at(top) {
203        let (pr, mut str) = match node.num_children() {
204            0 => {
205                match node.deref() {
206                    GrNode::Symbol(s) => (PR_ATOM, s.to_str_quote(symbol_table)),
207                    GrNode::LForm(var) => (PR_ATOM, if simplified || nt == Some(*var) { "<L>".to_string() } else { format!("<L={}>", Symbol::NT(*var).to_str(symbol_table)) }),
208                    GrNode::RAssoc => (PR_ATOM, "<R>".to_string()),
209                    GrNode::PrecEq => (PR_ATOM, "<P>".to_string()),
210                    s => panic!("{s:?} should have children"),
211                }
212            }
213            n => {
214                let mut node_children = children.split_off(children.len() - n);
215                match node.deref() {
216                    GrNode::Concat => pr_join(node_children, " ", PR_TERM),
217                    GrNode::Or => pr_join(node_children, " | ", PR_PROD),
218                    GrNode::Maybe => pr_append(node_children.pop().unwrap(), "?", PR_FACTOR),
219                    GrNode::Plus => pr_append(node_children.pop().unwrap(), "+", PR_FACTOR),
220                    GrNode::Star => pr_append(node_children.pop().unwrap(), "*", PR_FACTOR),
221                    GrNode::Instance => pr_join(node_children, " ", PR_FACTOR),
222                    s => panic!("{s:?} shouldn't have {n} child(ren)"),
223                }
224            }
225        };
226        if Some(node.index) == emphasis {
227            str = format!("{before}{str}{after}");
228        }
229        children.push((pr, str));
230    }
231    children.pop().unwrap().1
232}
233
234pub fn grtree_to_str(tree: &GrTree, node: Option<usize>, emphasis: Option<usize>, var: Option<VarId>, symbol_table: Option<&SymbolTable>, simplified: bool) -> String {
235    grtree_to_str_custom(tree, node, emphasis, var, symbol_table, simplified, false)
236}
237
238pub fn grtree_to_str_ansi(tree: &GrTree, node: Option<usize>, emphasis: Option<usize>, var: Option<VarId>, symbol_table: Option<&SymbolTable>, simplified: bool) -> String {
239    grtree_to_str_custom(tree, node, emphasis, var, symbol_table, simplified, true)
240}
241
242/// Cleans the empty symbols in a normalized tree. Removes empty terms if `del_empty_terms` is true.
243///
244/// Returns `Some((is_empty, had_empty_term))` for normalized trees, where
245/// * `is_empty` = true if only ε remains
246/// * `had_empty_term` = true if the tree had an empty term (was of the form `α | ε`)
247///
248/// If the top of the tree isn't a symbol, a `&`, or & `|`, the function doesn't process the tree
249/// and returns `None`. If something else than those 3 types of nodes is met inside the tree, it's
250/// simply ignored.
251///
252/// The modifiers `<L>`, `<R>`, or `<P>` alone(s) with `ε` in a term will be simplified, but not
253/// if there are other items in the term:
254///
255/// ```text
256/// del_empty_terms: true     false
257///                  ----     -----
258/// a | <L> ε   =>   a        a | ε
259/// a | <R>     =>   a        a | ε
260/// <P> ε a     =>   <P> a    <P> a
261/// ```
262fn grtree_cleanup(tree: &mut GrTree, top: Option<usize>, del_empty_term: bool) -> Option<(bool, bool)> {
263    const VERBOSE: bool = false;
264    let root = top.unwrap_or_else(|| tree.get_root().unwrap());
265    let mut had_empty_term = false;
266    let (terms, is_or) = match tree.get(root) {
267        GrNode::Symbol(s) => {
268            let is_empty = *s == Symbol::Empty;
269            return Some((is_empty, is_empty));
270        }
271        GrNode::Concat => (vec![root], false),
272        GrNode::Or => (tree.children(root).to_owned(), true),
273        // we don't handle those cases:
274        GrNode::Maybe | GrNode::Plus | GrNode::Star | GrNode::LForm(_)
275        | GrNode::RAssoc | GrNode::PrecEq | GrNode::Instance => { return None }
276    };
277    let terms_len = terms.len();
278    let mut empty_terms = vec![];
279    for (term_pos, term) in terms.into_iter().enumerate() {
280        match *tree.get(term) {
281            GrNode::Concat => {
282                let children = tree.children(term);
283                let len = children.len();
284                let mut empty_pos = vec![];
285                let n_modifiers = children.iter().enumerate()
286                    .fold(0, |n_mod, (pos, &index)| {
287                        let n = tree.get(index);
288                        if n.is_empty() {
289                            empty_pos.push(pos);
290                        }
291                        n_mod + if n.is_modifier() { 1 } else { 0 }
292                    });
293                if VERBOSE { print!("- term {}  => {empty_pos:?} empty, {n_modifiers} modifier", tree.to_str_index(Some(term), None)); }
294                if empty_pos.len() + n_modifiers == len {
295                    *tree.get_mut(term) = gnode!(e);
296                    tree.children_mut(term).clear();
297                    empty_terms.push(term_pos);
298                    if VERBOSE { println!(" (replacing everything with ε)"); }
299                } else if !empty_pos.is_empty() {
300                    if VERBOSE { println!("  => (removing {} ε)", empty_pos.len()); }
301                    let new_children = tree.children_mut(term);
302                    for i in empty_pos.into_iter().rev() {
303                        new_children.remove(i);
304                    }
305                } else {
306                    if VERBOSE { println!(" (nothing to do)"); }
307                }
308            }
309            GrNode::Symbol(Symbol::Empty) => {
310                empty_terms.push(term_pos);
311            }
312            n if n.is_modifier() => {   // lone modifier
313                *tree.get_mut(term) = gnode!(e);
314                empty_terms.push(term_pos);
315            }
316            _ => {}
317        }
318    }
319    if VERBOSE { println!("  {} empty terms: {empty_terms:?}", empty_terms.len()); }
320    if !empty_terms.is_empty() {
321        had_empty_term = true;
322        if is_or {
323            if !del_empty_term && terms_len > 1 || empty_terms.len() == terms_len {
324                empty_terms.pop();
325            }
326            let or_children = tree.children_mut(root);
327            for i in empty_terms.into_iter().rev() {
328                or_children.remove(i);
329            }
330            if VERBOSE { println!("or_children => {or_children:?}"); }
331        }
332    }
333    if is_or && tree.children(root).len() == 1 {
334        // removes the top | if it has only one child (not entirely necessary but simplifies the rest)
335        let subroot_index = tree.children(root)[0];
336        let subroot = *tree.get(subroot_index);
337        let subroot_children = tree.children(subroot_index).to_owned();
338        *tree.get_mut(root) = subroot;
339        *tree.children_mut(root) = subroot_children;
340    }
341    let is_empty = match tree.get(root) {
342        GrNode::Symbol(s) => s.is_empty(),
343        GrNode::Concat => false, // an empty `&` is simplified to `ε` above // matches!(tree.children(root), &[i] if tree.get(i).is_empty()),
344        GrNode::Or => false, // an empty `|` is simplified to `ε` above
345        GrNode::Maybe | GrNode::Plus | GrNode::Star | GrNode::LForm(_) | GrNode::RAssoc | GrNode::PrecEq | GrNode::Instance => false,
346    };
347    Some((is_empty, had_empty_term))
348}
349
350/// Removes duplicate 'ε' symbols in `nodes` alts before adding them as children in a `&`.
351///
352/// ## Examples:
353/// * `ε ε` -> `ε`
354/// * `ε A` -> `A`
355/// * `ε` -> `ε`
356fn remove_concat_dup_empty(tree: &GrTree, nodes: &mut Vec<usize>) {
357    if nodes.len() > 1 {
358        let mut i = 0;
359        while i < nodes.len() && nodes.len() > 1 {
360            if tree.get(nodes[i]).is_empty() {
361                nodes.remove(i);
362            } else {
363                i += 1;
364            }
365        }
366    }
367}
368
369/// Adds methods to GrTree.
370///
371/// _NOTE: We must create a trait for GrTree since we can't implement functions for an external type,
372/// and a type alias is not considered as a new type._
373pub trait GrTreeExt {
374    fn get_dup(&mut self, dup_index: &mut Dup) -> usize;
375    fn to_str(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String;
376    fn to_str_index(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String;
377}
378
379impl GrTreeExt for GrTree {
380    fn get_dup(&mut self, dup_index: &mut Dup) -> usize {
381        match dup_index.get() {
382            DupVal::Original(index) => index as usize,
383            DupVal::Copy(index) => {
384                let node = self.get(index as usize).clone();
385                self.add(None, node)
386            }
387        }
388    }
389
390    fn to_str(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String {
391        let tfmt = GrTreeFmt {
392            tree: &self,
393            show_ids: false,
394            show_depth: false,
395            symbol_table,
396            start_node,
397        };
398        tfmt.to_string()
399    }
400
401    fn to_str_index(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String {
402        let tfmt = GrTreeFmt {
403            tree: &self,
404            show_ids: true,
405            show_depth: false,
406            symbol_table,
407            start_node,
408        };
409        tfmt.to_string()
410    }
411}
412
413pub struct GrTreeFmt<'a> {
414    tree: &'a GrTree,
415    show_ids: bool,
416    show_depth: bool,
417    symbol_table: Option<&'a SymbolTable>,
418    start_node: Option<usize>
419}
420
421impl<'a> GrTreeFmt<'a> {
422    fn snode(&self, node: &GrNode, node_id: usize, depth: u32) -> String {
423        let show_ids = self.show_ids;
424        let show_depth = self.show_depth;
425        let mut result = String::new();
426        if show_depth {
427            result.push_str(&depth.to_string());
428            result.push('>');
429        }
430        if show_ids {
431            result.push_str(&node_id.to_string());
432            result.push(':');
433        }
434        let name = if let GrNode::Symbol(sym) = node {
435            if self.symbol_table.is_some() { sym.to_str_quote(self.symbol_table) } else { sym.to_str(self.symbol_table) }
436        } else {
437            node.to_str(self.symbol_table)
438        };
439        result.push_str(name.as_str());
440        result
441    }
442}
443
444impl Display for GrTreeFmt<'_> {
445    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
446        if self.tree.is_empty() {
447            return write!(f, "<empty>");
448        }
449        let start_node = self.start_node.unwrap_or_else(|| self.tree.get_root().expect("the tree must have a defined root"));
450        let mut stack = Vec::<String>::new();
451        for node in self.tree.iter_depth_at(start_node) {
452            let n = node.num_children();
453            if n > 0 {
454                let children = stack.drain(stack.len() - n..).join(", ");
455                stack.push(format!("{}({children})", self.snode(&node, node.index, node.depth)));
456            } else {
457                stack.push(self.snode(&node, node.index, node.depth));
458            }
459        }
460        write!(f, "{}", stack.pop().unwrap_or_else(|| "empty".to_string()))
461    }
462}
463
464// ---------------------------------------------------------------------------------------------
465
466#[derive(Clone, Copy, PartialEq, Debug)]
467pub enum NTConversion {
468    /// Removed because not used
469    Removed,
470    /// + or * child moved to L-form holder
471    MovedTo(VarId)
472}
473
474#[derive(Clone, Debug)]
475pub struct RuleTreeSet<T> {
476    trees: Vec<GrTree>,
477    start: Option<VarId>,
478    symbol_table: Option<SymbolTable>,
479    flags: Vec<u32>,            // NT -> flags (+ or * normalization)
480    parent: Vec<Option<VarId>>, // NT -> parent NT
481    nt_conversion: HashMap<VarId, NTConversion>,
482    origin: Origin<(VarId, usize), FromRTS>,
483    log: BufLog,
484    _phantom: PhantomData<T>
485}
486
487impl<T> HasBuildErrorSource for RuleTreeSet<T> {
488    const SOURCE: BuildErrorSource = BuildErrorSource::RuleTreeSet;
489}
490
491// Methods for both General and Normalized forms. There can only be immutable methods
492// in the normalized form.
493impl<T> RuleTreeSet<T> {
494    pub fn get_num_nt(&self) -> VarId {
495        self.trees.len() as VarId
496    }
497    
498    pub fn get_tree(&self, var: VarId) -> Option<&GrTree> {
499        self.trees.get(var as usize)
500    }
501
502    /// Returns all the non-empty trees
503    pub fn get_trees_iter(&self) -> impl Iterator<Item=(VarId, &GrTree)> {
504        self.trees.iter().index().filter_map(|(id, t)| if t.is_empty() { None } else { Some((id, t)) })
505    }
506
507    /// Returns all the variables corresponding to a non-empty tree
508    pub fn get_vars(&self) -> impl Iterator<Item=VarId> + '_ {
509        (0..self.trees.len()).filter_map(|id| if self.trees[id].is_empty() { None } else { Some(id as VarId) })
510    }
511
512    /// Returns a variable ID that doesn't exist yet.
513    pub fn get_next_available_var(&self) -> VarId {
514        self.trees.len() as VarId
515    }
516
517    /// Returns a set of all the terminals used in the ruleset.
518    pub fn get_terminals(&self) -> HashSet<TokenId> {
519        let tset = self.trees.iter()
520            .flat_map(|t| t.iter_depth_simple())
521            .filter_map(|x| if let GrNode::Symbol(Symbol::T(t)) = x.deref() { Some(*t) } else { None })
522            .collect::<HashSet<_>>();
523        tset
524    }
525
526    pub fn set_symbol_table(&mut self, symbol_table: SymbolTable) {
527        self.symbol_table = Some(symbol_table);
528    }
529
530    pub fn get_symbol_table(&self) -> Option<&SymbolTable> {
531        self.symbol_table.as_ref()
532    }
533
534    /// Sets the starting production rule.
535    pub fn set_start(&mut self, start: VarId) {
536        self.start = Some(start);
537    }
538
539    /// Builds the string representation of the [`rule tree set`](RuleTreeSet) of variable `var`,
540    /// optionally starting at node `node`. If `emphasis` contains a node ID, this subpart of the
541    /// tree is emphasized in the string.
542    pub fn to_str(&self, var: VarId, node: Option<usize>, emphasis: Option<usize>) -> String {
543        grtree_to_str(&self.trees[var as usize], node, emphasis, Some(var), self.get_symbol_table(), false)
544    }
545
546    pub fn get_log_mut(&mut self) -> &mut BufLog {
547        &mut self.log
548    }
549}
550
551impl<T> LogReader for RuleTreeSet<T> {
552    type Item = BufLog;
553
554    fn get_log(&self) -> &Self::Item {
555        &self.log
556    }
557
558    fn give_log(self) -> Self::Item {
559        self.log
560    }
561}
562
563// Mutable methods for the General form.
564impl RuleTreeSet<General> {
565    pub fn new() -> Self {
566        Self::with_log(BufLog::new())
567    }
568
569    pub fn with_log(log: BufLog) -> Self {
570        RuleTreeSet {
571            trees: Vec::new(),
572            start: None,
573            symbol_table: None,
574            flags: Vec::new(),
575            parent: Vec::new(),
576            nt_conversion: HashMap::new(),
577            origin: Origin::new(),
578            log,
579            _phantom: PhantomData,
580        }
581    }
582
583    /// Gets the tree corresponding to `var`. Creates it if it doesn't exist yet.
584    pub fn get_tree_mut(&mut self, var: VarId) -> &mut GrTree {
585        let var = var as usize;
586        if var >= self.trees.len() {
587            self.trees.resize(var + 1, GrTree::new());
588        }
589        &mut self.trees[var]
590    }
591
592    /// Sets the tree corresponding to `var`. If the variable already exists,
593    /// the tree is replaced. Otherwise, the set is enlarged to add it.
594    pub fn set_tree(&mut self, var: VarId, tree: GrTree) {
595        let var = var as usize;
596        if var >= self.trees.len() {
597            if var > self.trees.len() {
598                if self.trees.capacity() < var + 1 {
599                    // if capacity = 2, var = 3 => we need 4, so 2 more
600                    self.trees.reserve(var + 1 - self.trees.capacity())
601                }
602                self.trees.resize(var, GrTree::new());
603            }
604            self.trees.push(tree);
605        } else {
606            self.trees[var] = tree;
607        }
608    }
609
610    /// Normalizes all the production rules.
611    pub fn normalize(&mut self) {
612        self.log.add_note("original rules:");
613        (0..self.trees.len() as VarId)
614            .for_each(|v| self.log.add_note(format!("- NT[{v:2}] {} -> {}", Symbol::NT(v).to_str(self.get_symbol_table()), self.to_str(v, None, None))));
615        self.log.add_note("normalizing rules...");
616        self.check_num_nt_coherency();
617        self.normalize_vars();
618        self.flags.resize(self.trees.len(), 0);
619        self.parent.resize(self.trees.len(), None);
620    }
621
622    fn check_num_nt_coherency(&mut self) {
623        if let Some(n) = self.symbol_table.as_ref().and_then(|table| Some(table.get_num_nt())) {
624            if n != self.trees.len() {
625                self.log.add_error(format!("there are {} rules but the symbol table has {n} nonterminal symbols: dropping the table", self.trees.len()));
626                self.symbol_table = None;
627            }
628        }
629    }
630
631    /// Transforms the production rule tree into a list of rules in normalized format:
632    /// `var -> &(leaf_1, leaf_2, ...leaf_n)`
633    ///
634    /// The product may have to be split if operators like `+` or `*` are used. In this
635    /// case, new nonterminals are created, with increasing IDs starting from
636    /// `new_var`.
637    fn normalize_vars(&mut self) {
638        const VERBOSE: bool = false;
639        const VERBOSE_CC: bool = false;
640        let mut exclude_nt = HashSet::<VarId>::new();
641        for var in 0..self.get_num_nt() {
642            if exclude_nt.contains(&var) {
643                if VERBOSE { println!("NT[{var}] {} excluded", Symbol::NT(var).to_str(self.get_symbol_table())); }
644                continue
645            }
646            if VERBOSE { println!("normalize_var(NT[{var}] {})", Symbol::NT(var).to_str(self.get_symbol_table())); }
647            let mut new_var = self.get_next_available_var();
648            let orig = take(&mut self.trees[var as usize]);
649            let mut new = GrTree::new();
650            let mut orig_new = GrTree::new();
651            let mut orig_rep_vars = HashMap::<VarId, usize>::new();
652            let mut stack = Vec::<usize>::new();    // indices in new
653            for sym in orig.iter_depth() {
654                let n = sym.num_children();
655                if VERBOSE { println!("- old {}:{}", sym.index, *sym); }
656                if n == 0 {
657                    let new_id = new.add(None, orig.get(sym.index).clone());
658                    stack.push(new_id);
659                    if VERBOSE { print!("  leaf: "); }
660                } else {
661                    match *sym {
662                        // we must rearrange the operations so that any item on the stack is only
663                        // one of those patterns:
664                        // - a leaf
665                        // - a &(leaves)
666                        // - a |(&(leaves) or leaves)
667                        GrNode::Concat | GrNode::Or => {
668                            let children = stack.split_off(stack.len() - n);
669                            let new_id = if children.iter().all(|&idx| !matches!(new.get(idx), GrNode::Concat|GrNode::Or)) {
670                                if VERBOSE { print!("  trivial {}: children={}\n  ", *sym, children.iter().map(|s| new.get(*s).to_str(self.get_symbol_table())).join(", ")); }
671                                // trivial case with only leaves as children (could be removed and treated as a general case)
672                                new.addci_iter(None, sym.clone(), children)
673                            } else {
674                                if *sym == GrNode::Or {
675                                    if VERBOSE {
676                                        // println!("  or: children={}", children.iter().map(|&id| format!("{id}:{}", grtree_to_str(&new, Some(id), None, self.get_symbol_table()))).join(", "));
677                                        println!("  or: children={}", children.iter().map(|&id| format!("{}", new.to_str_index(Some(id), self.get_symbol_table()))).join(", "));
678                                    }
679                                    // if parent sym is p:|
680                                    // - preserving the children's order:
681                                    //   - attach '|' children's children directly under p (discarding the '|' children)
682                                    //   - attach '&' children under p
683                                    // - push p back to stack
684                                    // ex: P: AB | (C|D) | E | (FG|HI)             -> P: AB | C | D | E | FG | HI
685                                    //        |(&(A,B),|(C,D),E,|(&(F,G),&(H,I)))        |(&(A,B),C,D,E,&(F,G),&(H,I))
686                                    let mut new_children = Vec::new();
687                                    for id in children {
688                                        match new.get(id) {
689                                            GrNode::Symbol(_) | GrNode::Concat => {
690                                                if VERBOSE { println!("  - child {id} is {}", new.get(id)); }
691                                                new_children.push(id);
692                                            }
693                                            GrNode::Or => {
694                                                if VERBOSE { println!("  - child {id} is | with children {:?}", new.children(id)); }
695                                                new_children.extend(new.children(id));
696                                            }
697                                            x => panic!("unexpected node type under | node: {x}"),
698                                        }
699                                    }
700                                    new.addci_iter(None, gnode!(|), new_children)
701                                } else { // *sym == GrNode::Concat
702                                    if VERBOSE_CC { println!("  &: children={children:?}"); }
703                                    // if parent sym is p:&
704                                    // - merge adjacent leaves and '&' children (optional)
705                                    // - cartesian product of all '|' children's children and '&' children,
706                                    //       duplicating nodes are required
707                                    // - add r:'|' node to tree, attaching the new '&' nodes under it
708                                    // - push r to stack
709                                    // ex: P: AB & (C|D) & E & (FG|H)        -> P: ABCEFG | ABCEH | ABDEFG | ABDEH
710                                    //        &(&(A,B),|(C,D),E,|(&(F,G),H))      |(&(A,B,C,E,F,G),&(A,B,C,E,H),&(A,B,D,E,F,G),&(A,B,D,E,H)
711
712                                    // we store the dups in an array and reference them by index, because there will be multiple instances
713                                    // pointing to the same Dup and we can't do that with mutable references (which must be unique):
714                                    let mut dups = Vec::<Vec<Dup>>::new();
715                                    let concats_children = children.into_iter()
716                                        // iterations: &(A,B) -> |(C,D) -> E -> |(&(F,G),H))
717                                        .flat_map(|id| {
718                                            if VERBOSE_CC { print!("      FL {}: ", new.get(id)); }
719                                            match new.get(id) {
720                                                GrNode::Concat =>
721                                                    new.children(id).iter().map(|idc| vec![vaddi(&mut dups, [Dup::new(*idc)])]).to_vec(),
722                                                GrNode::Or => {
723                                                    let children = new.children(id).to_vec();
724                                                    vec![children.into_iter().map(|idc| {
725                                                        if let GrNode::Concat = new.get(idc) {
726                                                            let idc_children = new.children(idc).iter().map(|i| Dup::new(*i)).to_vec();
727                                                            vaddi(&mut dups, idc_children)
728                                                        } else {
729                                                            vaddi(&mut dups, [Dup::new(idc)])
730                                                        }
731                                                    }).to_vec()]
732                                                }
733                                                _ => vec![vec![vaddi(&mut dups, [Dup::new(id)])]],
734                                            }
735                                        })
736                                        // [d(A)] -> [d(B)] -> [d(C),d(D)] -> [d(E)] -> [d(&(d(F),d(G))),d(H)]
737                                        // .inspect(|x| println!("      >> {}", x.iter().map(|i| format!("_{i}")).join(", ")))
738                                        .cproduct()
739                                        // .inspect(|x| println!("      << {}", x.iter().map(|i| format!("_{i}")).join(", ")))
740                                        // [dup(A),dup(B),dup(C),dup(E),d(&)] -> [dup(A),dup(B),dup(C),dup(E),d(H)] ->
741                                        //       [dup(A),dup(B),dup(D),dup(E),d(&)] -> [dup(A),dup(B),dup(D),dup(E),d(H)]
742                                        .map(|dup_ids| {
743                                            let mut nodes = dup_ids.into_iter()
744                                                .flat_map(|dup_id| dups.get_mut(dup_id).unwrap().iter_mut()
745                                                    .map(|dup| new.get_dup(dup)).to_vec()).to_vec();
746                                            remove_concat_dup_empty(&new, &mut nodes);
747                                            nodes
748                                        })
749                                        // .inspect(|x| println!("      :: {}", x.iter().map(|i| format!("{i}")).join(", ")))
750                                        .to_vec();
751                                    // [A,B,C,E,F,G] -> [A',B',C',E',H] -> [A'',B'',D,E'',F',G'] -> [A''',B''',D',E''',H']
752                                    let concats = concats_children.into_iter()
753                                        .map(|children_ids| new.addci_iter(None, gnode!(&), children_ids))
754                                        .to_vec();
755                                    // Vec<node id of &-branch>
756                                    new.addci_iter(None, gnode!(|), concats)
757                                }
758                            };
759                            stack.push(new_id);
760                        }
761                        GrNode::Maybe => {
762                            if n != 1 {
763                                self.log.add_error(format!("normalize_var({}): ? should only have one child; found {n}: {}",
764                                                           Symbol::NT(var).to_str(self.get_symbol_table()),
765                                                           orig.to_str(Some(sym.index), self.get_symbol_table())));
766                            } else {
767                                // self              new
768                                // -------------------------------
769                                // ?(A)           -> |(A,ε)
770                                // ?(&(A,B))      -> |(&(A,B),ε)
771                                // ?(|(&(A,B),C)) -> |(&(A,B),C,ε)
772                                if VERBOSE { print!("  ?: "); }
773                                let maybe_child = stack.pop().unwrap();
774                                let proceed = match grtree_cleanup(&mut new, Some(maybe_child), true) {
775                                    None => {
776                                        self.log.add_error(format!(
777                                            "unexpected child of ?: {} (should be &, |, or symbol)",
778                                            grtree_to_str(&new, Some(maybe_child), None, Some(var), self.get_symbol_table(), false)));
779                                        if VERBOSE { println!("ERROR: unexpected child of ?: {}", grtree_to_str(&new, Some(maybe_child), None, Some(var), self.get_symbol_table(), false)); }
780                                        return;
781                                    }
782                                    // (is_empty, had_empty_term)
783                                    Some((true, _)) => {
784                                        // the child is `ε`
785                                        stack.push(maybe_child);
786                                        if VERBOSE { println!("child of ? simplified to ε"); }
787                                        false
788                                    }
789                                    _ => true,
790                                };
791                                if proceed {
792                                    let empty = new.add(None, gnode!(e));
793                                    let id = match new.get(maybe_child) {
794                                        GrNode::Or => {
795                                            new.add(Some(maybe_child), gnode!(e));
796                                            maybe_child
797                                        }
798                                        _ => new.addci_iter(None, gnode!(|), [maybe_child, empty])
799                                    };
800                                    stack.push(id);
801                                }
802                            }
803                        }
804                        GrNode::Plus | GrNode::Star => {
805                            // + can change to *, so we treat both of them at the same time
806                            //
807                            // P -> αβ+γ becomes P -> αQγ
808                            //                   Q -> βQ | β
809                            //
810                            //     self              new  new(Q=next_var_id)               simpler format
811                            //     ----------------------------------------------------------------------------------------
812                            //     +(A)           -> Q    |(&(A,Q), A')                    AQ|A
813                            //     +(&(A,B))      -> Q    |(&(A,B,Q),&(A',B'))             ABQ|AB
814                            //     +(|(&(A,B),C)) -> Q    |(&(A,B,Q),&(C,Q'),&(A',B'),C')  (AB|C)Q | (AB|C) = ABQ|CQ | AB|C
815                            //
816                            // P -> αβ*γ becomes P -> αQγ
817                            //                   Q -> βQ | ε
818                            //
819                            //     self              new  new(Q=next_var_id)     simpler format
820                            //     -----------------------------------------------------------------------
821                            //     *(A)           -> Q    |(&(A,Q), ε)           AQ|ε
822                            //     *(&(A,B))      -> Q    |(&(A,B,Q),ε)          ABQ|ε
823                            //     *(|(&(A,B),C)) -> Q    |(&(A,B,Q),&(C,Q'),ε)  (AB|C)Q | ε = ABQ|CQ | ε
824                            let mut is_plus = *sym == GrNode::Plus;
825                            let sym_char = if is_plus { '+' } else { '*' };
826                            if VERBOSE { print!("  {sym_char}: "); }
827                            if n != 1 {
828                                self.log.add_error(format!(
829                                    "normalize_var({}): {sym_char} should only have one child; found {n}: {}",
830                                    Symbol::NT(var).to_str(self.get_symbol_table()),
831                                    orig.to_str(Some(sym.index), self.get_symbol_table())));
832                                if VERBOSE { println!("ERROR: found {n} children instead of 1"); }
833                                return;
834                            }
835                            let rep_child = stack.pop().unwrap();
836                            if VERBOSE {}
837                            let proceed = match grtree_cleanup(&mut new, Some(rep_child), true) {
838                                None => {
839                                    self.log.add_error(format!(
840                                        "unexpected child of {sym_char}: {} (should be &, |, or symbol)",
841                                        grtree_to_str(&new, Some(rep_child), None, Some(var), self.get_symbol_table(), false)));
842                                    if VERBOSE { println!("ERROR: unexpected child {}", grtree_to_str(&new, Some(rep_child), None, Some(var), self.get_symbol_table(), false)); }
843                                    return;
844                                }
845                                // (is_empty, had_empty_term)
846                                Some((true, _)) => {
847                                    // the child is `ε`
848                                    stack.push(rep_child);
849                                    if VERBOSE { println!("child simplified to ε"); }
850                                    false
851                                }
852                                Some((false, true)) => {
853                                    // the child had the form `α + ε` and is now `α`, so if the operator was +,
854                                    // it must become * since empty must remain a possibility (it doesn't change
855                                    // (anything if it was already *)
856                                    if is_plus {
857                                        is_plus = false;
858                                        // sym_char = '*';
859                                        if VERBOSE { print!(" becomes * (child lost ε term), "); }
860                                    }
861                                    true
862                                }
863                                Some((false, false)) => true, // nothing special, processing below
864                            };
865                            if proceed {
866                                if VERBOSE { println!("=> {}", grtree_to_str(&new, Some(rep_child), None, Some(var), self.get_symbol_table(), false)); }
867                                let orig_rep = orig_new.add(None, if is_plus { gnode!(+) } else { gnode!(*) });
868                                let orig_rep_child = orig_new.add_from_tree(Some(orig_rep), &new, Some(rep_child));
869                                let (id, qvar) = self.normalize_plus_or_star(
870                                    rep_child, orig_rep, orig_rep_child, &mut new, &mut orig_new, var, &mut new_var, is_plus, &mut exclude_nt);
871                                stack.push(id);
872                                orig_rep_vars.insert(qvar, orig_rep); // to replace later
873                            }
874                        }
875                        _ => panic!("Unexpected {}", sym.deref())
876                    }
877                }
878                if VERBOSE {
879                    println!("stack: {}", stack.iter()
880                        .map(|id| {
881                            let children = new.children(*id);
882                            format!("{id}:{}{}", new.get(*id), if children.is_empty() { "".to_string() } else { format!("({})", children.iter().join(",")) })
883                        }).join(", ")
884                    );
885                }
886            }
887            if stack.len() != 1 {
888                self.log.add_error(format!("normalize_var({}): error while normalizing the rules, {} remaining nodes instead of 1",
889                                           Symbol::NT(var).to_str(self.get_symbol_table()), stack.len()));
890                return;
891            }
892            if VERBOSE_CC { println!("Final stack id: {}", stack[0]); }
893            let root = stack.pop().unwrap();
894            new.set_root(root);
895            match grtree_cleanup(&mut new, None, false) {
896                None => {
897                    self.log.add_error(format!(
898                        "unexpected root of {} -> {} (should be &, |, or symbol)",
899                        Symbol::NT(var).to_str(self.get_symbol_table()),
900                        grtree_to_str(&new, None, None, Some(var), self.get_symbol_table(), false)));
901                    if VERBOSE { println!("ERROR: unexpected root {}", grtree_to_str(&new, None, None, Some(var), self.get_symbol_table(), false)); }
902                }
903                // (is_empty, had_empty_term)
904                Some((true, _)) => {
905                    self.log.add_warning(format!("{} is empty", Symbol::NT(var).to_str(self.get_symbol_table())));
906                }
907                _ => {}
908            }
909            let orig_root = orig_new.add_from_tree_callback(None, &new, None, |from, to, _| self.origin.add((var, to), (var, from)));
910            orig_new.set_root(orig_root);
911            while !orig_rep_vars.is_empty() {
912                // We must replace new nonterminals with their original (though normalized) +* content, but we can't
913                // update `orig_new` while we're iterating it, so we proceed in two steps:
914                // - iterate in `orig_new`, locate the new +* nonterminals and put the node indices in orig_rep_nodes
915                // - iterate orig_rep_nodes and modify nodes in orig_new
916                // Since each replacement can make new nonterminals visible (if they're embedded in one another),
917                // we must repeat those steps until all `orig_rep_vars` have been found and replaced.
918                let mut orig_rep_nodes = Vec::<(usize, usize)>::new();
919                let mut to_remove = Vec::<VarId>::new();
920                for node in orig_new.iter_depth() {
921                    if let GrNode::Symbol(Symbol::NT(rep_var)) = node.deref() {
922                        if let Some(&orig_rep_id) = orig_rep_vars.get(&rep_var) {
923                            to_remove.push(*rep_var);
924                            orig_rep_nodes.push((node.index, orig_rep_id));
925                            self.origin.add((*rep_var, self.get_tree(*rep_var).unwrap().get_root().unwrap()), (var, orig_rep_id));
926                        }
927                    }
928                }
929                for (orig_id, child_id) in orig_rep_nodes {
930                    *orig_new.get_mut(orig_id) = gnode!(inst);
931                    orig_new.attach_child(orig_id, child_id);
932                }
933                for var in to_remove {
934                    orig_rep_vars.remove(&var);
935                }
936            }
937            self.origin.set_tree(var, orig_new);
938            self.set_tree(var, new);
939        }
940    }
941
942    fn normalize_plus_or_star(
943        &mut self, rep_child: usize, orig_rep: usize, orig_rep_child: usize,
944        new: &mut GrTree, orig_new: &mut GrTree, var: VarId, new_var: &mut VarId, is_plus: bool,
945        exclude_nt: &mut HashSet<VarId>
946    ) -> (usize, VarId)
947    {
948        const VERBOSE: bool = false;
949        const OPTIMIZE_SUB_OR: bool = false;
950        self.symbol_table.as_ref().map(|st| assert_eq!(st.get_num_nt(), self.trees.len(), "number of nt in symbol table doesn't match num_nt"));
951        let (mut qvar, mut rvar) = (*new_var, *new_var + 1);
952        let mut qtree = GrTree::new();
953        let mut rtree = GrTree::new();
954        let mut use_rtree = false;
955
956        // finds possible occurences of <L=var>, detects conflicts, and updates qvar/rvar if necessary
957        let mut lform_nt = None;
958        for node in orig_new.iter_depth_at(orig_rep_child) {
959            if let GrNode::LForm(v) = *node {
960                if matches!(lform_nt, Some(v2) if v != v2) {
961                    let symtab = self.get_symbol_table();
962                    self.log.add_error(
963                        format!("in {}, {}: conflicting <L={}> and <L={}>",
964                                Symbol::NT(var).to_str(symtab),
965                                grtree_to_str(orig_new, Some(orig_rep), None, Some(var), symtab, false),
966                                Symbol::NT(lform_nt.unwrap()).to_str(symtab), Symbol::NT(v).to_str(symtab)));
967                } else {
968                    lform_nt = Some(v);
969                    (qvar, rvar) = (v, *new_var);
970                }
971            }
972        }
973
974        // See comments in `normalize_var` near the calls to this method for details about the operations below.
975        // We copy from the origin tree `orig_new` to trace the new node IDs to the original ones.
976        match orig_new.get(orig_rep_child) {
977            GrNode::Symbol(s) => {
978                if VERBOSE { print!("({rep_child}:{s}) "); }
979                // note: we cannot use the child id in qtree!
980                let or = qtree.add_root(gnode!(|));
981                let cc = qtree.add(Some(or), gnode!(&));
982                let child = qtree.add(Some(cc), GrNode::Symbol(s.clone()));
983                qtree.add(Some(cc), gnode!(nt qvar));
984                let child2 = qtree.add(Some(or), if is_plus { GrNode::Symbol(s.clone()) } else { gnode!(e) });
985                self.origin.add((qvar, child), (var, orig_rep_child));     // useful?
986                self.origin.add((qvar, cc), (var, orig_rep_child));
987                if is_plus {
988                    self.origin.add((qvar, child2), (var, orig_rep_child));
989                }
990            }
991            GrNode::Concat => {
992                let id_grchildren = new.children(rep_child);
993                if VERBOSE { print!("({rep_child}:&({})) ", id_grchildren.iter().join(", ")); }
994                let or = qtree.add_root(gnode!(|));
995                let cc1 = qtree.add_from_tree_callback(Some(or), orig_new, Some(orig_rep_child), |to, from, _n| {
996                    self.origin.add((qvar, to), (var, from))
997                });
998                let loop_id = qtree.add(Some(cc1), gnode!(nt qvar));
999                self.origin.add((qvar, loop_id), (var, orig_rep));
1000                if is_plus {
1001                    let loop_id2 = qtree.add_from_tree(Some(or), &new, Some(rep_child));
1002                    self.origin.add((qvar, loop_id2), (var, orig_rep_child));
1003                } else {
1004                    qtree.add(Some(or), gnode!(e));
1005                }
1006            }
1007            #[allow(unreachable_patterns)]
1008            GrNode::Or => if !OPTIMIZE_SUB_OR {
1009                let id_grchildren = new.children(rep_child);
1010                if VERBOSE { print!("({rep_child}:|({})) ", id_grchildren.iter().join(", ")); }
1011                let orig_id_grchildren = orig_new.children(orig_rep_child);
1012                let or = qtree.add_root(gnode!(|));
1013                for orig_id_grchild in orig_id_grchildren {
1014                    let orig_grchild = orig_new.get(*orig_id_grchild);
1015                    match orig_grchild {
1016                        GrNode::Symbol(s) => {
1017                            let cc = qtree.add(Some(or), gnode!(&));
1018                            let child = qtree.add_iter(Some(cc), [GrNode::Symbol(s.clone()), gnode!(nt qvar)])[0];
1019                            self.origin.add((qvar, cc), (var, *orig_id_grchild));
1020                            self.origin.add((qvar, child), (var, *orig_id_grchild));
1021                            if is_plus {
1022                                let plus_or = qtree.add(Some(or), GrNode::Symbol(s.clone()));
1023                                self.origin.add((qvar, plus_or), (var, *orig_id_grchild));
1024                            }
1025                        }
1026                        GrNode::Concat => {
1027                            let cc = qtree.add_from_tree_callback(Some(or), orig_new, Some(*orig_id_grchild), |to, from, _n| {
1028                                self.origin.add((qvar, to), (var, from));
1029                            });
1030                            qtree.add(Some(cc), gnode!(nt qvar));
1031                            if is_plus {
1032                                qtree.add_from_tree_callback(Some(or), &orig_new, Some(*orig_id_grchild), |to, from, _| {
1033                                    self.origin.add((qvar, to), (var, from));
1034                                });
1035                            }
1036                        }
1037                        x => panic!("unexpected node type under a | node: {x}"),
1038                    }
1039                }
1040                if !is_plus {
1041                    qtree.add(Some(or), gnode!(e));
1042                }
1043            }
1044            // TODO: remove this optimization?
1045            #[allow(unreachable_patterns)]
1046            GrNode::Or => if OPTIMIZE_SUB_OR {
1047                // P -> αβ*γ becomes P -> αQγ         P -> α(β)+γ becomes P -> αQγ
1048                //                   Q -> βQ | ε                          Q -> βR
1049                // β can be β1|β2|..., which is distributed               R -> Q | ε
1050                //
1051                // self             new(Q=new_var)                   simpler format
1052                // ---------------------------------------------------------------------------------
1053                // *(|(&(A,B),C))   |(&(A,B,Q),&(C,Q'),ε)            Q -> ABQ|CQ | ε
1054                // +(|(&(A,B),C))   |(&(A,B,Q),&(C,Q'),&(A',B'),C')  Q -> ABQ|CQ | AB|C
1055                //
1056                // new method:
1057                // *(|(&(A,B),C))   |(&(A,B,Q),&(C,Q'),ε)            Q -> ABQ|CQ | ε
1058                // +(|(&(A,B),C))   |(&(A,B,Q),&(C,Q'))              Q -> ABR|CR         R -> Q | ε
1059                let id_grchildren = new.children(rep_child);
1060                if VERBOSE { print!("({rep_child}:|({})) ", id_grchildren.iter().join(", ")); }
1061                let orig_id_grchildren = orig_new.children(orig_rep_child);
1062                let or = qtree.add_root(gnode!(|));
1063                for orig_id_grchild in orig_id_grchildren {
1064                    let orig_grchild = new.get(*orig_id_grchild);
1065                    match orig_grchild {
1066                        GrNode::Symbol(s) => {
1067                            if is_plus {
1068                                qtree.addc_iter(Some(or), gnode!(&), [GrNode::Symbol(s.clone()), gnode!(nt rvar)]);
1069                                use_rtree = true;
1070                            } else {
1071                                qtree.addc_iter(Some(or), gnode!(&), [GrNode::Symbol(s.clone()), gnode!(nt qvar)]);
1072                            }
1073                        }
1074                        GrNode::Concat => {
1075                            let cc = qtree.add_from_tree_callback(Some(or), orig_new, Some(*orig_id_grchild), |to, from, _n| {
1076                                self.origin.add((qvar, to), (var, from));
1077                            });
1078                            if is_plus {
1079                                qtree.add(Some(cc), gnode!(nt rvar));
1080                            } else {
1081                                qtree.add(Some(cc), gnode!(nt qvar));
1082                            }
1083                        }
1084                        x => panic!("unexpected node type under a | node: {x}"),
1085                    }
1086                }
1087                if use_rtree {
1088                    let or1 = rtree.add_root(gnode!(|));
1089                    rtree.add_iter(Some(or1), [gnode!(nt qvar), gnode!(e)]);
1090                } else if !is_plus {
1091                    qtree.add(Some(or), gnode!(e));
1092                }
1093            }
1094            _ => panic!("Unexpected node type under a + node: {}", new.get(rep_child))
1095        }
1096        if let Some(v) = lform_nt {
1097            // `new_var` replaces `v`
1098            if v == var {
1099                self.log.add_error(
1100                    format!("in {}, {}: <L> points to the same nonterminal. It must be a new one, created for the loop.",
1101                            Symbol::NT(var).to_str(self.get_symbol_table()),
1102                            grtree_to_str(orig_new, Some(orig_rep), None, Some(var), self.get_symbol_table(), false)));
1103            } else {
1104                // self.nt_conversion.insert(v, MovedTo(qvar));
1105                for mut node in qtree.iter_depth_simple_mut() {
1106                    if let GrNode::LForm(v2) = node.deref_mut() {
1107                        if *v2 == v { *v2 = qvar; }
1108                    }
1109                }
1110                for mut node in orig_new.iter_depth_simple_at_mut(orig_rep_child) {
1111                    if let GrNode::LForm(v2) = node.deref_mut() {
1112                        if *v2 == v { *v2 = qvar; }
1113                    }
1114                }
1115            }
1116        }
1117        self.symbol_table.as_mut().map(|st| {
1118            if let Some(_v) = lform_nt {
1119                // let name = st.remove_nt_name(v);
1120                // if VERBOSE {
1121                //     println!("L-FORM({v}) found, using name of NT({v}) = '{name}' for new NT({new_var})");
1122                // }
1123                // assert_eq!(st.add_nonterminal(name), qvar);
1124            } else {
1125                assert_eq!(st.add_child_nonterminal(var), qvar);
1126            }
1127            if use_rtree {
1128                assert_eq!(st.add_child_nonterminal(var), rvar);
1129            }
1130        });
1131        let id = new.add(None, gnode!(nt qvar));
1132        assert!(qvar as usize >= self.trees.len() || self.trees[qvar as usize].is_empty(), "overwriting tree {new_var}");
1133        if VERBOSE { println!("qtree: NT[{qvar}] {} -> {}", Symbol::NT(qvar).to_str(self.get_symbol_table()), grtree_to_str(&qtree, None, None, Some(qvar), self.get_symbol_table(), false) /*qtree.to_str(None, self.get_symbol_table())*/); }
1134        self.set_tree(qvar, qtree);
1135        exclude_nt.insert(qvar);
1136        self.flags.resize(rvar as usize, 0);
1137        self.parent.resize(rvar as usize, None);
1138        let plus_flag = if is_plus { ruleflag::REPEAT_PLUS } else { 0 };
1139        self.flags[qvar as usize] = ruleflag::CHILD_REPEAT | plus_flag;
1140        self.flags[var as usize] |= ruleflag::PARENT_REPEAT | plus_flag;
1141        self.parent[qvar as usize] = Some(var);
1142        if use_rtree {
1143            if VERBOSE { println!("rtree: NT[{rvar}] {} -> {}", Symbol::NT(rvar).to_str(self.get_symbol_table()), grtree_to_str(&rtree, None, None, Some(rvar), self.get_symbol_table(), false)); }
1144            self.set_tree(rvar, rtree);
1145            exclude_nt.insert(var);
1146            self.flags.resize(rvar as usize + 1, 0);
1147            self.parent.resize(rvar as usize + 1, None);
1148            self.flags[rvar as usize] |= ruleflag::CHILD_L_FACT;
1149            self.parent[rvar as usize] = Some(qvar);
1150            self.flags[qvar as usize] |= ruleflag::PARENT_L_FACTOR;
1151        }
1152        if VERBOSE {
1153            println!("=> new sizes, flags = {}, parent = {}, trees = {} (new_var = {new_var})", self.flags.len(), self.parent.len(), self.trees.len());
1154            println!("=> {}: parent {}, child {}{}",
1155                     if is_plus { "+" } else { "*" },
1156                     Symbol::NT(var).to_str(self.get_symbol_table()),
1157                     Symbol::NT(qvar).to_str(self.get_symbol_table()),
1158                     if use_rtree { format!(", child {}", Symbol::NT(rvar).to_str(self.get_symbol_table())) } else { String::new() }
1159            );
1160        }
1161        // We rectify the parent/child relationship in case of cascaded + or *. Since we perform a
1162        // bottom-up reconstruction, a rule like A -> ( ( a )+ b )+ will yield
1163        //   A -> A_2
1164        //   A_1 -> a A_1 | ε       parent: A
1165        //   A_2 -> A_1 b A_2 | ε   parent: A
1166        // We want A_1's parent to be A_2. We keep the wrong order in the parent chain: A_1 -> A_2 -> A,
1167        // which is unfortunate in some later tests but still easier than changing everything here.
1168        let mut rectify_maybe = None;
1169        for node in self.get_tree(qvar).unwrap().iter_depth_simple() {
1170            if let GrNode::Symbol(Symbol::NT(child)) = node.deref() {
1171                if *child != qvar && self.flags[*child as usize] & ruleflag::CHILD_REPEAT != 0 {
1172                    rectify_maybe = Some(*child);
1173                    break;
1174                }
1175            }
1176        }
1177        if let Some(child) = rectify_maybe {
1178            self.parent[child as usize] = Some(qvar);
1179            self.flags[qvar as usize] |= ruleflag::PARENT_REPEAT;
1180            if VERBOSE {
1181                println!("=> rectify {}'s parent as {}",
1182                         Symbol::NT(child).to_str(self.get_symbol_table()),
1183                         Symbol::NT(qvar).to_str(self.get_symbol_table()));
1184            }
1185        }
1186        *new_var = self.get_next_available_var();
1187        (id, qvar)
1188    }
1189}
1190
1191impl BuildFrom<RuleTreeSet<General>> for RuleTreeSet<Normalized> {
1192    /// Transforms a `General` ruleset to a `Normalized` ruleset
1193    ///
1194    /// If an error is encountered or was already encountered before, an empty shell object
1195    /// is built with the log detailing the error(s).
1196    fn build_from(mut rules: RuleTreeSet<General>) -> Self {
1197        // We handle the errors by transmitting the log to the next construct rather than returning a `Result` type.
1198        // This allows to cascade the transforms without getting a complicated error resolving system while preserving
1199        // the information about the errors easily.
1200        if rules.log.has_no_errors() {
1201            rules.normalize();
1202        }
1203        RuleTreeSet::<Normalized> {
1204            trees: rules.trees,
1205            start: rules.start,
1206            symbol_table: rules.symbol_table,
1207            flags: rules.flags,
1208            parent: rules.parent,
1209            nt_conversion: rules.nt_conversion,
1210            origin: rules.origin,
1211            log: rules.log,
1212            _phantom: PhantomData
1213        }
1214    }
1215}
1216
1217// impl BuildFrom<RuleTreeSet<Normalized>> for RuleTreeSet<General> {
1218//     /// Transforms a `Normalized` ruleset to a `General` ruleset
1219//     fn build_from(mut rules: RuleTreeSet<Normalized>) -> Self {
1220//         RuleTreeSet::<General> { trees: rules.trees, next_var: rules.next_var, _phantom: PhantomData }
1221//     }
1222// }
1223
1224// ---------------------------------------------------------------------------------------------
1225// Macros
1226
1227pub mod macros {
1228    /// Generates a `GrNode` instance.
1229    ///
1230    /// # Examples
1231    /// ```
1232    /// # use lexigram_lib::{TokenId, VarId, gnode, parser::Symbol};
1233    /// # use lexigram_lib::grammar::GrNode;
1234    /// assert_eq!(gnode!([1]), GrNode::Symbol(Symbol::T(1 as TokenId)));
1235    /// assert_eq!(gnode!(t 2), GrNode::Symbol(Symbol::T(2 as TokenId)));
1236    /// assert_eq!(gnode!(nt 3), GrNode::Symbol(Symbol::NT(3 as VarId)));
1237    /// assert_eq!(gnode!(e), GrNode::Symbol(Symbol::Empty));
1238    /// assert_eq!(gnode!(end), GrNode::Symbol(Symbol::End));
1239    /// assert_eq!(gnode!(&), GrNode::Concat);
1240    /// assert_eq!(gnode!(|), GrNode::Or);
1241    /// assert_eq!(gnode!(?), GrNode::Maybe);
1242    /// assert_eq!(gnode!(+), GrNode::Plus);
1243    /// assert_eq!(gnode!(*), GrNode::Star);
1244    /// assert_eq!(gnode!(L 3), GrNode::LForm(3));
1245    /// assert_eq!(gnode!(R), GrNode::RAssoc);
1246    /// ```
1247    #[macro_export]
1248    macro_rules! gnode {
1249        ([$id:expr]) => { gnode!(t $id) };
1250        (t $id:expr) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::T($id as $crate::TokenId)) };
1251        (nt $id:expr) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::NT($id as $crate::VarId)) };
1252        (e) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::Empty) };
1253        (end) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::End) };
1254        //
1255        (&) => { $crate::grammar::GrNode::Concat };
1256        (|) => { $crate::grammar::GrNode::Or };
1257        (?) => { $crate::grammar::GrNode::Maybe };
1258        (+) => { $crate::grammar::GrNode::Plus };
1259        (*) => { $crate::grammar::GrNode::Star };
1260        (L $id:expr) => { $crate::grammar::GrNode::LForm($id) };
1261        (R) => { $crate::grammar::GrNode::RAssoc };
1262        (P) => { $crate::grammar::GrNode::PrecEq };
1263        (inst) => { $crate::grammar::GrNode::Instance };
1264    }
1265
1266    /// Generates a `Symbol` instance.
1267    ///
1268    /// # Examples
1269    /// ```
1270    /// # use lexigram_lib::{TokenId, VarId, sym};
1271    /// # use lexigram_lib::parser::Symbol;
1272    /// assert_eq!(sym!(t 2), Symbol::T(2 as TokenId));
1273    /// assert_eq!(sym!(nt 3), Symbol::NT(3 as VarId));
1274    /// assert_eq!(sym!(e), Symbol::Empty);
1275    /// assert_eq!(sym!(end), Symbol::End);
1276    #[macro_export]
1277    macro_rules! sym {
1278        (t $id:expr) => { $crate::parser::Symbol::T($id as $crate::TokenId) };
1279        (nt $id:expr) => { $crate::parser::Symbol::NT($id as $crate::VarId) };
1280        (e) => { $crate::parser::Symbol::Empty };
1281        (end) => { $crate::parser::Symbol::End };
1282    }
1283
1284    #[macro_export]
1285    macro_rules! altflag {
1286        (L) => { $crate::alt::ruleflag::L_FORM };
1287        (R) => { $crate::alt::ruleflag::R_ASSOC };
1288        (G) => { $crate::alt::ruleflag::GREEDY };
1289        (P) => { $crate::alt::ruleflag::PREC_EQ };
1290        ($f:expr) => { $f };
1291    }
1292
1293    /// Generates a production rule alternative. An alternative is made up of symbols separated by a comma.
1294    /// Each symbol is either
1295    /// - a non-terminal: `nt` {integer}
1296    /// - a terminal: `t` {integer}
1297    /// - the empty symbol: `e`
1298    ///
1299    /// Preceding an alternative with `# {integer}` sets a flag value on that alternative. The values are:
1300    /// - 128: L-form (low-latency parsing of that alternative)
1301    /// - 256: R-assoc (right-associative - by default, ambiguous alternatives like 'E * E' are left-associative)
1302    ///
1303    /// # Example
1304    /// ```
1305    /// # use lexigram_core::TokenId;
1306    /// # use lexigram_core::alt::Alternative;
1307    /// # use lexigram_lib::{alt, sym};
1308    /// assert_eq!(alt!(nt 1, t 2, e), Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(e)]));
1309    /// assert_eq!(alt!(#128, nt 1, t 2, e), Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(e)]).with_flags(128));
1310    /// assert_eq!(alt!(#L, nt 1, t 2, e), Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(e)]).with_flags(128));
1311    /// let x = 256;
1312    /// let o_id = 4;
1313    /// assert_eq!(alt!(#(x, o_id), nt 0, t 1, e), Alternative::new(vec![sym!(nt 0), sym!(t 1), sym!(e)]).with_flags(256).with_ambig_alt_id(4));
1314    /// ```
1315    #[macro_export]
1316    macro_rules! alt {
1317        () => { $crate::alt::Alternative::new(std::vec![]) };
1318        ($($a:ident $($b:expr)?,)+) => { alt!($($a $($b)?),+) };
1319        ($($a:ident $($b:expr)?),*) => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]) };
1320        (#$f:literal, $($a:ident $($b:expr)?,)+) => { alt!(#$f, $($a $($b)?),+) };
1321        (#$f:literal, $($a:ident $($b:expr)?),*) => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($f) };
1322        // (#$f:ident, $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?,)+) => { alt!(#$f, $(%($v, $id),)? $($a $($b)?),+) };
1323        // (#$f:ident, $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*)
1324        //     => {$crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($crate::altflag!($f))$(.with_orig($v, $id))? };
1325        ($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?,)+) => { alt!($(#$f,)? $(%($v, $id),)? $($a $($b)?),+) };
1326        ($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*)
1327            => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*])$(.with_flags($crate::altflag!($f)))?$(.with_origin($v, $id))? };
1328        // TODO: change "#" parts below
1329        (#($f:expr, $o:expr), $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?,)+)
1330            => { alt!(#($f, $o), $(%($v, $id),)? $($a $($b)?),+) };
1331        (#($f:expr, $o:expr), $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*)
1332            => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($crate::altflag!($f)).with_ambig_alt_id($o)$(.with_origin($v, $id))? };
1333        (%($v:expr, $id:expr), $($a:ident $($b:expr)?,)+)
1334            => { alt!(%($v, $id), $($a $($b)?),+) };
1335        (%($v:expr, $id:expr), $($a:ident $($b:expr)?),*)
1336            => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($crate::altflag!($f)).with_origin($v, $id) };
1337    }
1338
1339    #[macro_export]
1340    macro_rules! symbols {
1341        () => { std::vec![] };
1342        ($($a:ident $($b:literal $(: $num:expr)?)?,)+) => { symbols![$($a $($b $(: $num)?)?),+] };
1343        ($($a:ident $($b:literal $(: $num:expr)?)?),*) => { std::vec![$($crate::sym!($a $($b $(: $num)?)?)),*] };
1344    }
1345
1346    /// Generates a production rule. It is made up of alternatives separated by a semicolon.
1347    ///
1348    /// Example
1349    /// ```
1350    /// # use lexigram_lib::{TokenId, VarId, parser::Symbol};
1351    /// # use lexigram_core::alt::Alternative;
1352    /// # use lexigram_lib::{prule, alt, sym};
1353    /// assert_eq!(prule!(nt 1, t 2, nt 1, t 3; nt 2; e),
1354    ///            vec![Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(nt 1), sym!(t 3)]),
1355    ///                 Alternative::new(vec![sym!(nt  2)]),
1356    ///                 Alternative::new(vec![sym!(e)])]);
1357    /// assert_eq!(prule!(nt 1, t 2, nt 1, t 3; #128, nt 2; e),
1358    ///            vec![Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(nt 1), sym!(t 3)]),
1359    ///                 Alternative::new(vec![sym!(nt  2)]).with_flags(128),
1360    ///                 Alternative::new(vec![sym!(e)])]);
1361    /// assert_eq!(prule!(nt 1, t 2, nt 1, t 3; #L, nt 2; e),
1362    ///            vec![Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(nt 1), sym!(t 3)]),
1363    ///                 Alternative::new(vec![sym!(nt  2)]).with_flags(128),
1364    ///                 Alternative::new(vec![sym!(e)])]);
1365    /// ```
1366    #[macro_export]
1367    macro_rules! prule {
1368        () => { std::vec![] };
1369        ($($(#$f:literal,)? $($a:ident $($b:expr)?),*;)+) => { prule![$($(#$f,)? $($a $($b)?),+);+] };
1370        ($($(#$f:literal,)? $($a:ident $($b:expr)?),*);*) => { std::vec![$($crate::alt![$(#$f,)? $($a $($b)?),+]),*]};
1371        ($($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*;)+) => { prule![$($(#$f,)? $(%($v, $id),)? $($a $($b)?),+);+] };
1372        ($($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*);*) => { std::vec![$($crate::alt![$(#$f,)? $(%($v, $id),)? $($a $($b)?),+]),*]};
1373        // TODO: change "#" part below
1374        ($($(#($f:expr, $o:expr),)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*;)+) => { prule![$($(#($f, $o),)? $(%($v, $id),)? $($a $($b)?),+);+] };
1375        ($($(#($f:expr, $o:expr),)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*);*) => { std::vec![$($crate::alt![$(#($f,$o),)? $(%($v, $id),)? $($a $($b)?),+]),*]};
1376    }
1377}