Skip to main content

lexigram_lib/grammar/
mod.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3pub mod tests;
4pub mod origin;
5mod prs;
6
7pub use prs::*;
8
9use std::collections::{HashMap, HashSet};
10use std::fmt::{Display, Formatter};
11use std::marker::PhantomData;
12use std::mem::take;
13use std::ops::{Deref, DerefMut};
14use iter_index::IndexerIterator;
15use vectree::VecTree;
16use lexigram_core::alt::ruleflag;
17use lexigram_core::CollectJoin;
18use crate::cproduct::CProduct;
19use crate::{alt, gnode, hashset, indent_source, prule, sym, vaddi, General, Normalized, TokenId, VarId, LL1, LR};
20use crate::fixed_sym_table::SymInfoTable;
21use crate::grammar::NTConversion::{MovedTo, Removed};
22use crate::grammar::origin::{FromPRS, FromRTS, Origin};
23use lexigram_core::log::{BufLog, LogReader, LogStatus, Logger};
24use crate::build::{BuildErrorSource, BuildFrom, HasBuildErrorSource};
25use crate::parser::Symbol;
26use crate::SymbolTable;
27
28#[derive(Clone, Copy, PartialEq, Debug)]
29pub enum GrNode {
30    Symbol(Symbol),
31    Concat,
32    Or,
33    Maybe,
34    Plus,
35    Star,
36    /// L-form attribute of an alternative or a `+` / `*` repetition expression.
37    /// - `+` and `*` expressions are either folded or iterative (also called "low latency", since the listener is
38    ///   called back immediately after parsing each item, whereas the folded form is only called once all the
39    ///   items have been parsed and gathered).
40    ///   - The default form is folded. With that form, the items of the repetitions are automatically gathered and handed
41    ///     to the listener callback as an array (if the items have a value) once all items have been parsed. For example,
42    ///     `A -> a (b)* c` gives a context with the values of a, b, c as variant
43    ///     `enum CtxA { A { a: String, b: Vec<String>, c: String } }`.
44    ///   - If the L-form is specified, the listener callback is called at each iteration, with a context giving the parsed
45    ///     items of that iteration which have a value. The NT used in that loop is defined with the L-form (`LForm(VarId)`),
46    ///     and its value serves as accumulator to fold all the successive items into a single value presented in the context
47    ///     of the alternative that includes the `+` or `*` repetition. For example, `A -> a (<L=AIter> b)* c` uses `AIter`,
48    ///     and each time a `b` value is parsed, the listener callback receives a context variant
49    ///     `enum CtxAIter { AIter1 { iter: SynAIter, b: String } }`. The callback must return the new `SynAIter` value.
50    ///     Once all the iterations are parsed, `c` is parsed, and the listener callback receives the context for `A`:
51    ///     `CtxA { A { a: String, star: SynAIter, c: String } }`.
52    /// - Right-recursive rules are either stacked or "low-latency".
53    ///   - The default form is stacked. A rule `A -> id A | stop` parsing "id1 id2 id3 stop1" yields a sequence
54    ///     - `A -> id1 A(1)`, `A(1) -> id2 A(2)`, `A(2) -> id3 A(3)`, `A(3) -> stop1`
55    ///
56    ///     Since it's recursive, the listener callback is first called for A(3), then A(2), A(1), and finally A. The parser
57    ///     puts the intermediate values of `id` on the stack, and once `stop1` is reached, it calls the callback with it,
58    ///     then unstacks all the `id` values for the successive callbacks with `id = id3`, `id2`, and finally `id1`,
59    ///     together with the loop value `A`, which is updated each time by the callback.
60    ///   - The low-latency form, whose `VarId` points to its own NT, calls the listener callback at each iteration and
61    ///     doesn't accumulate values on the stack. In the example above, it's first called with `id = id1`, `id2`, `id3`,
62    ///     and finally `stop = stop1`, together with the loop value `A`, which is updated each time by the callback.
63    LForm(VarId),   // applied to NT
64    RAssoc,         // applied to alternative, right-associative
65    PrecEq,         // applied to alternative, same precedence as previous alternative
66    Instance,       // instance of * or + in reference origin trees
67    Greedy,         // alternative is greedy in parsing table
68}
69
70impl Display for GrNode {
71    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
72        match self {
73            GrNode::Symbol(s) => write!(f, "{s}"),
74            GrNode::Concat => write!(f, "&"),
75            GrNode::Or => write!(f, "|"),
76            GrNode::Maybe => write!(f, "?"),
77            GrNode::Plus => write!(f, "+"),
78            GrNode::Star => write!(f, "*"),
79            GrNode::LForm(v) => write!(f, "<L={v}>"),
80            GrNode::RAssoc => write!(f, "<R>"),
81            GrNode::PrecEq => write!(f, "<P>"),
82            GrNode::Instance => write!(f, "inst "),
83            GrNode::Greedy => write!(f, "<G>"),
84        }
85    }
86}
87
88impl GrNode {
89    pub fn to_str(&self, symbol_table: Option<&SymbolTable>) -> String {
90        match self {
91            GrNode::Symbol(s) => symbol_table.map(|t| t.get_str(s)).unwrap_or(s.to_string()),
92            GrNode::LForm(v) => format!("<L={}>", symbol_table.map(|t| t.get_str(&Symbol::NT(*v))).unwrap_or(v.to_string())),
93            _ => self.to_string()
94        }
95    }
96
97    pub fn gen_source_code(&self) -> String {
98        match self {
99            GrNode::Symbol(s) => format!("gnode!({})", s.to_macro_item()),
100            GrNode::Concat    => "gnode!(&)".to_string(),
101            GrNode::Or        => "gnode!(|)".to_string(),
102            GrNode::Maybe     => "gnode!(?)".to_string(),
103            GrNode::Plus      => "gnode!(+)".to_string(),
104            GrNode::Star      => "gnode!(*)".to_string(),
105            GrNode::LForm(v)  => format!("gnode!(L {v})"),
106            GrNode::RAssoc    => "gnode!(R)".to_string(),
107            GrNode::PrecEq    => "gnode!(P)".to_string(),
108            GrNode::Instance  => "gnode!(inst)".to_string(),
109            GrNode::Greedy    => "gnode!(G)".to_string(),
110        }
111    }
112
113    pub fn is_modifier(&self) -> bool {
114        matches!(self, GrNode::LForm(_) | GrNode::RAssoc | GrNode::PrecEq | GrNode::Greedy)
115    }
116
117    pub fn is_empty(&self) -> bool {
118        matches!(self, GrNode::Symbol(Symbol::Empty))
119    }
120}
121
122// ---------------------------------------------------------------------------------------------
123
124/// Simple index object that returns `Original(<value>)` on the first `index.get()`, then
125/// `Copy(<value>)` on subsequent calls. The indices are stored on 31 bits, keeping one bit
126/// for the 'original' flag. Trying to store larger values triggers a panic.
127#[derive(Clone, Copy)]
128pub struct Dup {
129    index: u32
130}
131
132#[derive(Clone, Copy, Debug)]
133enum DupVal {
134    Original(u32),
135    Copy(u32)
136}
137
138impl Dup {
139    const MASK: u32 = 1 << (u32::BITS - 1);
140
141    fn new(index: usize) -> Self {
142        assert!(index < Self::MASK as usize);
143        Self { index: index as u32 }
144    }
145
146    fn get(&mut self) -> DupVal {
147        let idx = self.index;
148        if idx & Self::MASK == 0 {
149            self.index |= Self::MASK;
150            DupVal::Original(idx)
151        } else {
152            DupVal::Copy(idx & !Self::MASK)
153        }
154    }
155}
156
157impl std::fmt::Debug for Dup {
158    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
159        write!(f, "Dup{{")?;
160        if self.index & Self::MASK == 0 {
161            write!(f, "{}}}", self.index)
162        } else {
163            write!(f, "copy {}}}", self.index & !Dup::MASK)
164        }
165    }
166}
167
168// ---------------------------------------------------------------------------------------------
169
170pub type GrTree = VecTree<GrNode>;
171
172pub(crate) const STR_BEFORE: &str = " ►► ";
173pub(crate) const STR_AFTER: &str = " ◄◄ ";
174pub(crate) const STR_BEFORE_ANSI: &str = "\u{1b}[4;1;36m";
175pub(crate) const STR_AFTER_ANSI: &str = "\u{1b}[0m";
176
177/// Builds the string representation of the [`GrTree`], using the [`symbol table`](SymbolTable) if available
178/// and optionally starting at node `node`. If `emphasis` contains a node ID, this subpart of the tree
179/// is emphasized in the string.
180pub fn grtree_to_str_custom(tree: &GrTree, node: Option<usize>, emphasis: Option<usize>, nt: Option<VarId>, symbol_table: Option<&SymbolTable>, simplified: bool, ansi: bool)
181                            -> String
182{
183    fn pr_join(children: Vec<(u32, String)>, str: &str, pr: u32) -> (u32, String) {
184        (pr, children.into_iter()
185            .map(|(p_ch, ch)| if p_ch >= pr { ch } else { format!("({ch})") })
186            .join(str))
187    }
188
189    fn pr_append(child: (u32, String), str: &str, pr: u32) -> (u32, String) {
190        (pr, if child.0 >= pr { format!("{}{str}", child.1) } else { format!("({}){str}", child.1) })
191    }
192
193    const PR_PROD: u32 = 1;
194    const PR_TERM: u32 = 2;
195    const PR_FACTOR: u32 = 3;
196    const PR_ATOM: u32 = 4;
197
198    let mut children = vec![];
199    if tree.is_empty() {
200        return "<empty>".to_string();
201    }
202    let before = if ansi { STR_BEFORE_ANSI } else { STR_BEFORE };
203    let after = if ansi { STR_AFTER_ANSI } else { STR_AFTER };
204    let top = node.unwrap_or_else(|| tree.get_root().unwrap());
205    for node in tree.iter_post_depth_simple_at(top) {
206        let (pr, mut str) = match node.num_children() {
207            0 => {
208                match node.deref() {
209                    GrNode::Symbol(s) => (PR_ATOM, s.to_str_quote(symbol_table)),
210                    GrNode::LForm(var) => (PR_ATOM, if simplified || nt == Some(*var) { "<L>".to_string() } else { format!("<L={}>", Symbol::NT(*var).to_str(symbol_table)) }),
211                    GrNode::RAssoc => (PR_ATOM, "<R>".to_string()),
212                    GrNode::PrecEq => (PR_ATOM, "<P>".to_string()),
213                    GrNode::Greedy => (PR_ATOM, "<G>".to_string()),
214                    _ => (PR_ATOM, "##ERROR##".to_string()),
215                }
216            }
217            n => {
218                let mut node_children = children.split_off(children.len() - n);
219                match node.deref() {
220                    GrNode::Concat => pr_join(node_children, " ", PR_TERM),
221                    GrNode::Or => pr_join(node_children, " | ", PR_PROD),
222                    GrNode::Maybe => pr_append(node_children.pop().unwrap(), "?", PR_FACTOR),
223                    GrNode::Plus => pr_append(node_children.pop().unwrap(), "+", PR_FACTOR),
224                    GrNode::Star => pr_append(node_children.pop().unwrap(), "*", PR_FACTOR),
225                    GrNode::Instance => pr_join(node_children, " ", PR_FACTOR),
226                    _ => (PR_ATOM, "##ERROR##".to_string()),
227                }
228            }
229        };
230        if Some(node.index) == emphasis {
231            str = format!("{before}{str}{after}");
232        }
233        children.push((pr, str));
234    }
235    children.pop().unwrap().1
236}
237
238pub fn grtree_to_str(tree: &GrTree, node: Option<usize>, emphasis: Option<usize>, var: Option<VarId>, symbol_table: Option<&SymbolTable>, simplified: bool) -> String {
239    grtree_to_str_custom(tree, node, emphasis, var, symbol_table, simplified, false)
240}
241
242pub fn grtree_to_str_ansi(tree: &GrTree, node: Option<usize>, emphasis: Option<usize>, var: Option<VarId>, symbol_table: Option<&SymbolTable>, simplified: bool) -> String {
243    grtree_to_str_custom(tree, node, emphasis, var, symbol_table, simplified, true)
244}
245
246/// Cleans the empty symbols in a normalized tree. Removes empty terms if `del_empty_terms` is true.
247///
248/// Returns `Some((is_empty, had_empty_term))` for normalized trees, where
249/// * `is_empty` = true if only ε remains
250/// * `had_empty_term` = true if the tree had an empty term (was of the form `α | ε`)
251///
252/// If the top of the tree isn't a symbol, a `&`, or & `|`, the function doesn't process the tree
253/// and returns `None`. If something else than those 3 types of nodes is met inside the tree, it's
254/// simply ignored.
255///
256/// The modifiers `<L>`, `<R>`, or `<P>` alone(s) with `ε` in a term will be simplified, but not
257/// if there are other items in the term:
258///
259/// ```text
260/// del_empty_terms: true     false
261///                  ----     -----
262/// a | <L> ε   =>   a        a | ε
263/// a | <R>     =>   a        a | ε
264/// <P> ε a     =>   <P> a    <P> a
265/// ```
266fn grtree_cleanup(tree: &mut GrTree, top: Option<usize>, del_empty_term: bool) -> Option<(bool, bool)> {
267    const VERBOSE: bool = false;
268    let root = top.unwrap_or_else(|| tree.get_root().unwrap());
269    let mut had_empty_term = false;
270    let (terms, is_or) = match tree.get(root) {
271        GrNode::Symbol(s) => {
272            let is_empty = *s == Symbol::Empty;
273            return Some((is_empty, is_empty));
274        }
275        GrNode::Concat => (vec![root], false),
276        GrNode::Or => (tree.children(root).to_owned(), true),
277        // we don't handle those cases:
278        GrNode::Maybe | GrNode::Plus | GrNode::Star | GrNode::LForm(_)
279        | GrNode::RAssoc | GrNode::PrecEq | GrNode::Instance | GrNode::Greedy => { return None }
280    };
281    let terms_len = terms.len();
282    let mut empty_terms = vec![];
283    for (term_pos, term) in terms.into_iter().enumerate() {
284        match *tree.get(term) {
285            GrNode::Concat => {
286                let children = tree.children(term);
287                let len = children.len();
288                let mut empty_pos = vec![];
289                let n_modifiers = children.iter().enumerate()
290                    .fold(0, |n_mod, (pos, &index)| {
291                        let n = tree.get(index);
292                        if n.is_empty() {
293                            empty_pos.push(pos);
294                        }
295                        n_mod + if n.is_modifier() { 1 } else { 0 }
296                    });
297                if VERBOSE { print!("- term {}  => {empty_pos:?} empty, {n_modifiers} modifier", tree.to_str_index(Some(term), None)); }
298                if empty_pos.len() + n_modifiers == len {
299                    *tree.get_mut(term) = gnode!(e);
300                    tree.children_mut(term).clear();
301                    empty_terms.push(term_pos);
302                    if VERBOSE { println!(" (replacing everything with ε)"); }
303                } else if !empty_pos.is_empty() {
304                    if VERBOSE { println!("  => (removing {} ε)", empty_pos.len()); }
305                    let new_children = tree.children_mut(term);
306                    for i in empty_pos.into_iter().rev() {
307                        new_children.remove(i);
308                    }
309                } else if VERBOSE {
310                    println!(" (nothing to do)");
311                }
312            }
313            GrNode::Symbol(Symbol::Empty) => {
314                empty_terms.push(term_pos);
315            }
316            n if n.is_modifier() => {   // lone modifier
317                *tree.get_mut(term) = gnode!(e);
318                empty_terms.push(term_pos);
319            }
320            _ => {}
321        }
322    }
323    if VERBOSE { println!("  {} empty terms: {empty_terms:?}", empty_terms.len()); }
324    if !empty_terms.is_empty() {
325        had_empty_term = true;
326        if is_or {
327            if !del_empty_term && terms_len > 1 || empty_terms.len() == terms_len {
328                empty_terms.pop();
329            }
330            let or_children = tree.children_mut(root);
331            for i in empty_terms.into_iter().rev() {
332                or_children.remove(i);
333            }
334            if VERBOSE { println!("or_children => {or_children:?}"); }
335        }
336    }
337    if is_or && tree.children(root).len() == 1 {
338        // removes the top | if it has only one child (not entirely necessary but simplifies the rest)
339        let subroot_index = tree.children(root)[0];
340        let subroot = *tree.get(subroot_index);
341        let subroot_children = tree.children(subroot_index).to_owned();
342        *tree.get_mut(root) = subroot;
343        *tree.children_mut(root) = subroot_children;
344    }
345    let is_empty = match tree.get(root) {
346        GrNode::Symbol(s) => s.is_empty(),
347        GrNode::Concat => false, // an empty `&` is simplified to `ε` above // matches!(tree.children(root), &[i] if tree.get(i).is_empty()),
348        GrNode::Or => false, // an empty `|` is simplified to `ε` above
349        GrNode::Maybe | GrNode::Plus | GrNode::Star | GrNode::LForm(_) | GrNode::RAssoc | GrNode::PrecEq | GrNode::Instance | GrNode::Greedy => false,
350    };
351    Some((is_empty, had_empty_term))
352}
353
354/// Removes duplicate 'ε' symbols in `nodes` alts before adding them as children in a `&`.
355///
356/// ## Examples:
357/// * `ε ε` -> `ε`
358/// * `ε A` -> `A`
359/// * `ε` -> `ε`
360fn remove_concat_dup_empty(tree: &GrTree, nodes: &mut Vec<usize>) {
361    if nodes.len() > 1 {
362        let mut i = 0;
363        while i < nodes.len() && nodes.len() > 1 {
364            if tree.get(nodes[i]).is_empty() {
365                nodes.remove(i);
366            } else {
367                i += 1;
368            }
369        }
370    }
371}
372
373/// Adds methods to GrTree.
374///
375/// _NOTE: We must create a trait for GrTree since we can't implement functions for an external type,
376/// and a type alias is not considered as a new type._
377pub trait GrTreeExt {
378    fn get_dup(&mut self, dup_index: &mut Dup) -> usize;
379    fn to_str(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String;
380    fn to_str_index(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String;
381}
382
383impl GrTreeExt for GrTree {
384    fn get_dup(&mut self, dup_index: &mut Dup) -> usize {
385        match dup_index.get() {
386            DupVal::Original(index) => index as usize,
387            DupVal::Copy(index) => {
388                let node = *self.get(index as usize);
389                self.add(None, node)
390            }
391        }
392    }
393
394    fn to_str(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String {
395        let tfmt = GrTreeFmt {
396            tree: self,
397            show_ids: false,
398            show_depth: false,
399            symbol_table,
400            start_node,
401        };
402        tfmt.to_string()
403    }
404
405    fn to_str_index(&self, start_node: Option<usize>, symbol_table: Option<&SymbolTable>) -> String {
406        let tfmt = GrTreeFmt {
407            tree: self,
408            show_ids: true,
409            show_depth: false,
410            symbol_table,
411            start_node,
412        };
413        tfmt.to_string()
414    }
415}
416
417pub struct GrTreeFmt<'a> {
418    tree: &'a GrTree,
419    show_ids: bool,
420    show_depth: bool,
421    symbol_table: Option<&'a SymbolTable>,
422    start_node: Option<usize>
423}
424
425impl<'a> GrTreeFmt<'a> {
426    fn snode(&self, node: &GrNode, node_id: usize, depth: u32) -> String {
427        let show_ids = self.show_ids;
428        let show_depth = self.show_depth;
429        let mut result = String::new();
430        if show_depth {
431            result.push_str(&depth.to_string());
432            result.push('>');
433        }
434        if show_ids {
435            result.push_str(&node_id.to_string());
436            result.push(':');
437        }
438        let name = if let GrNode::Symbol(sym) = node {
439            if self.symbol_table.is_some() { sym.to_str_quote(self.symbol_table) } else { sym.to_str(self.symbol_table) }
440        } else {
441            node.to_str(self.symbol_table)
442        };
443        result.push_str(name.as_str());
444        result
445    }
446}
447
448impl Display for GrTreeFmt<'_> {
449    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
450        if self.tree.is_empty() {
451            return write!(f, "<empty>");
452        }
453        let start_node = self.start_node.unwrap_or_else(|| self.tree.get_root().expect("the tree must have a defined root"));
454        let mut stack = Vec::<String>::new();
455        for node in self.tree.iter_post_depth_at(start_node) {
456            let n = node.num_children();
457            if n > 0 {
458                let children = stack.drain(stack.len() - n..).join(", ");
459                stack.push(format!("{}({children})", self.snode(&node, node.index, node.depth)));
460            } else {
461                stack.push(self.snode(&node, node.index, node.depth));
462            }
463        }
464        write!(f, "{}", stack.pop().unwrap_or_else(|| "empty".to_string()))
465    }
466}
467
468// ---------------------------------------------------------------------------------------------
469
470#[derive(Clone, Copy, PartialEq, Debug)]
471pub enum NTConversion {
472    /// Removed because not used
473    Removed,
474    /// + or * child moved to L-form holder
475    MovedTo(VarId)
476}
477
478#[derive(Clone, Debug)]
479pub struct RuleTreeSet<T> {
480    trees: Vec<GrTree>,
481    start: Option<VarId>,
482    symbol_table: Option<SymbolTable>,
483    flags: Vec<u32>,            // NT -> flags (+ or * normalization)
484    parent: Vec<Option<VarId>>, // NT -> parent NT
485    nt_conversion: HashMap<VarId, NTConversion>,
486    origin: Origin<(VarId, usize), FromRTS>,
487    log: BufLog,
488    _phantom: PhantomData<T>
489}
490
491impl<T> HasBuildErrorSource for RuleTreeSet<T> {
492    const SOURCE: BuildErrorSource = BuildErrorSource::RuleTreeSet;
493}
494
495// Methods for both General and Normalized forms. There can only be immutable methods
496// in the normalized form.
497impl<T> RuleTreeSet<T> {
498    pub fn get_num_nt(&self) -> VarId {
499        self.trees.len() as VarId
500    }
501    
502    pub fn get_tree(&self, var: VarId) -> Option<&GrTree> {
503        self.trees.get(var as usize)
504    }
505
506    /// Returns all the non-empty trees
507    pub fn get_trees_iter(&self) -> impl Iterator<Item=(VarId, &GrTree)> {
508        self.trees.iter().index().filter_map(|(id, t)| if t.is_empty() { None } else { Some((id, t)) })
509    }
510
511    /// Returns all the variables corresponding to a non-empty tree
512    pub fn get_vars(&self) -> impl Iterator<Item=VarId> + '_ {
513        (0..self.trees.len()).filter_map(|id| if self.trees[id].is_empty() { None } else { Some(id as VarId) })
514    }
515
516    /// Returns a variable ID that doesn't exist yet.
517    pub fn get_next_available_var(&self) -> VarId {
518        self.trees.len() as VarId
519    }
520
521    /// Returns a set of all the terminals used in the ruleset.
522    pub fn get_terminals(&self) -> HashSet<TokenId> {
523        self.trees.iter()
524            .flat_map(|t| t.iter_post_depth_simple())
525            .filter_map(|x| if let GrNode::Symbol(Symbol::T(t)) = x.deref() { Some(*t) } else { None })
526            .collect::<HashSet<_>>()
527    }
528
529    pub fn set_symbol_table(&mut self, symbol_table: SymbolTable) {
530        self.symbol_table = Some(symbol_table);
531    }
532
533    pub fn get_symbol_table(&self) -> Option<&SymbolTable> {
534        self.symbol_table.as_ref()
535    }
536
537    /// Sets the starting production rule.
538    pub fn set_start(&mut self, start: VarId) {
539        self.start = Some(start);
540    }
541
542    /// Builds the string representation of the [`rule tree set`](RuleTreeSet) of variable `var`,
543    /// optionally starting at node `node`. If `emphasis` contains a node ID, this subpart of the
544    /// tree is emphasized in the string.
545    pub fn to_str(&self, var: VarId, node: Option<usize>, emphasis: Option<usize>) -> String {
546        grtree_to_str(&self.trees[var as usize], node, emphasis, Some(var), self.get_symbol_table(), false)
547    }
548
549    pub fn get_log_mut(&mut self) -> &mut BufLog {
550        &mut self.log
551    }
552}
553
554impl<T> LogReader for RuleTreeSet<T> {
555    type Item = BufLog;
556
557    fn get_log(&self) -> &Self::Item {
558        &self.log
559    }
560
561    fn give_log(self) -> Self::Item {
562        self.log
563    }
564}
565
566impl RuleTreeSet<General> {
567    pub fn new() -> Self {
568        Self::with_log(BufLog::new())
569    }
570
571    pub fn with_log(log: BufLog) -> Self {
572        RuleTreeSet {
573            trees: Vec::new(),
574            start: None,
575            symbol_table: None,
576            flags: Vec::new(),
577            parent: Vec::new(),
578            nt_conversion: HashMap::new(),
579            origin: Origin::new(),
580            log,
581            _phantom: PhantomData,
582        }
583    }
584
585    /// Gets the tree corresponding to `var`. Creates it if it doesn't exist yet.
586    pub fn get_tree_mut(&mut self, var: VarId) -> &mut GrTree {
587        let var = var as usize;
588        if var >= self.trees.len() {
589            self.trees.resize(var + 1, GrTree::new());
590        }
591        &mut self.trees[var]
592    }
593
594    /// Sets the tree corresponding to `var`. If the variable already exists,
595    /// the tree is replaced. Otherwise, the set is enlarged to add it.
596    pub fn set_tree(&mut self, var: VarId, tree: GrTree) {
597        let var = var as usize;
598        if var >= self.trees.len() {
599            if var > self.trees.len() {
600                if self.trees.capacity() < var + 1 {
601                    // if capacity = 2, var = 3 => we need 4, so 2 more
602                    self.trees.reserve(var + 1 - self.trees.capacity())
603                }
604                self.trees.resize(var, GrTree::new());
605            }
606            self.trees.push(tree);
607        } else {
608            self.trees[var] = tree;
609        }
610    }
611
612    /// Normalizes all the production rules.
613    pub fn normalize(&mut self) {
614        self.log.add_note("original rules:");
615        (0..self.trees.len() as VarId)
616            .for_each(|v| self.log.add_note(format!("- NT[{v:2}] {} -> {}", Symbol::NT(v).to_str(self.get_symbol_table()), self.to_str(v, None, None))));
617        self.log.add_note("normalizing rules...");
618        self.check_num_nt_coherency();
619        self.normalize_vars();
620        self.flags.resize(self.trees.len(), 0);
621        self.parent.resize(self.trees.len(), None);
622        (0..self.trees.len() as VarId)
623            .for_each(|v| self.log.add_note(format!("- NT[{v:2}] {} -> {}", Symbol::NT(v).to_str(self.get_symbol_table()), self.to_str(v, None, None))));
624    }
625
626    fn check_num_nt_coherency(&mut self) {
627        if let Some(n) = self.symbol_table.as_ref().map(|table| table.get_num_nt()) {
628            if n != self.trees.len() {
629                self.log.add_error(format!("there are {} rules but the symbol table has {n} nonterminal symbols: dropping the table", self.trees.len()));
630                self.symbol_table = None;
631            }
632        }
633    }
634
635    /// Transforms the production rule tree into a list of rules in normalized format:
636    /// `var -> &(leaf_1, leaf_2, ...leaf_n)`
637    ///
638    /// The product may have to be split if operators like `+` or `*` are used. In this
639    /// case, new nonterminals are created, with increasing IDs starting from
640    /// `new_var`.
641    fn normalize_vars(&mut self) {
642        const VERBOSE: bool = false;
643        const VERBOSE_CC: bool = false;
644        let mut exclude_nt = HashSet::<VarId>::new();
645        for var in 0..self.get_num_nt() {
646            if exclude_nt.contains(&var) {
647                if VERBOSE { println!("NT[{var}] {} excluded", Symbol::NT(var).to_str(self.get_symbol_table())); }
648                continue
649            }
650            if VERBOSE { println!("normalize_var(NT[{var}] {})", Symbol::NT(var).to_str(self.get_symbol_table())); }
651            let mut new_var = self.get_next_available_var();
652            let orig = take(&mut self.trees[var as usize]);
653            let mut new = GrTree::new();
654            let mut orig_new = GrTree::new();
655            let mut orig_rep_vars = HashMap::<VarId, usize>::new();
656            let mut stack = Vec::<usize>::new();    // indices in new
657            for sym in orig.iter_post_depth() {
658                let n = sym.num_children();
659                if VERBOSE { println!("- old {}:{}", sym.index, *sym); }
660                if n == 0 {
661                    if matches!(*sym, GrNode::Maybe | GrNode::Plus | GrNode::Star) {
662                        self.log.add_error(format!(
663                            "normalize_var({}): {} must have one child; found none",
664                            Symbol::NT(var).to_str(self.get_symbol_table()), *sym));
665                        // replace with empty symbol
666                        stack.push(new.add(None, GrNode::Symbol(Symbol::Empty)));
667                    } else {
668                        let new_id = new.add(None, *orig.get(sym.index));
669                        stack.push(new_id);
670                        if VERBOSE { print!("  leaf: "); }
671                    }
672                } else {
673                    match *sym {
674                        // we must rearrange the operations so that any item on the stack is only
675                        // one of those patterns:
676                        // - a leaf
677                        // - a &(leaves)
678                        // - a |(&(leaves) or leaves)
679                        GrNode::Concat | GrNode::Or => {
680                            let children = stack.split_off(stack.len() - n);
681                            let new_id = if children.iter().all(|&idx| !matches!(new.get(idx), GrNode::Concat|GrNode::Or)) {
682                                if VERBOSE { print!("  trivial {}: children={}\n  ", *sym, children.iter().map(|s| new.get(*s).to_str(self.get_symbol_table())).join(", ")); }
683                                // trivial case with only leaves as children (could be removed and treated as a general case)
684                                new.addci_iter(None, *sym, children)
685                            } else if *sym == GrNode::Or {
686                                if VERBOSE {
687                                    // println!("  or: children={}", children.iter().map(|&id| format!("{id}:{}", grtree_to_str(&new, Some(id), None, self.get_symbol_table()))).join(", "));
688                                    println!(
689                                        "  or: children={}",
690                                        children.iter().map(|&id| new.to_str_index(Some(id), self.get_symbol_table())).join(", "));
691                                }
692                                // if parent sym is p:|
693                                // - preserving the children's order:
694                                //   - attach '|' children's children directly under p (discarding the '|' children)
695                                //   - attach '&' children under p
696                                // - push p back to stack
697                                // ex: P: AB | (C|D) | E | (FG|HI)             -> P: AB | C | D | E | FG | HI
698                                //        |(&(A,B),|(C,D),E,|(&(F,G),&(H,I)))        |(&(A,B),C,D,E,&(F,G),&(H,I))
699                                let mut new_children = Vec::new();
700                                for id in children {
701                                    match new.get(id) {
702                                        GrNode::Symbol(_) | GrNode::Concat | GrNode::Greedy => {
703                                            if VERBOSE { println!("  - child {id} is {}", new.get(id)); }
704                                            new_children.push(id);
705                                        }
706                                        GrNode::Or => {
707                                            if VERBOSE { println!("  - child {id} is | with children {:?}", new.children(id)); }
708                                            new_children.extend(new.children(id));
709                                        }
710                                        x => panic!("unexpected node type under | node: {x}"),
711                                    }
712                                }
713                                new.addci_iter(None, gnode!(|), new_children)
714                            } else { // *sym == GrNode::Concat
715                                if VERBOSE_CC { println!("  &: children={children:?}"); }
716                                // if parent sym is p:&
717                                // - merge adjacent leaves and '&' children (optional)
718                                // - cartesian product of all '|' children's children and '&' children,
719                                //       duplicating nodes are required
720                                // - add r:'|' node to tree, attaching the new '&' nodes under it
721                                // - push r to stack
722                                // ex: P: AB & (C|D) & E & (FG|H)        -> P: ABCEFG | ABCEH | ABDEFG | ABDEH
723                                //        &(&(A,B),|(C,D),E,|(&(F,G),H))      |(&(A,B,C,E,F,G),&(A,B,C,E,H),&(A,B,D,E,F,G),&(A,B,D,E,H)
724
725                                // we store the dups in an array and reference them by index, because there will be multiple instances
726                                // pointing to the same Dup and we can't do that with mutable references (which must be unique):
727                                let mut dups = Vec::<Vec<Dup>>::new();
728                                let concats_children = children.into_iter()
729                                    // iterations: &(A,B) -> |(C,D) -> E -> |(&(F,G),H))
730                                    .flat_map(|id| {
731                                        if VERBOSE_CC { print!("      FL {}: ", new.get(id)); }
732                                        match new.get(id) {
733                                            GrNode::Concat =>
734                                                new.children(id).iter().map(|idc| vec![vaddi(&mut dups, [Dup::new(*idc)])]).to_vec(),
735                                            GrNode::Or => {
736                                                let children = new.children(id).to_vec();
737                                                vec![children.into_iter().map(|idc| {
738                                                    if let GrNode::Concat = new.get(idc) {
739                                                        let idc_children = new.children(idc).iter().map(|i| Dup::new(*i)).to_vec();
740                                                        vaddi(&mut dups, idc_children)
741                                                    } else {
742                                                        vaddi(&mut dups, [Dup::new(idc)])
743                                                    }
744                                                }).to_vec()]
745                                            }
746                                            _ => vec![vec![vaddi(&mut dups, [Dup::new(id)])]],
747                                        }
748                                    })
749                                    // [d(A)] -> [d(B)] -> [d(C),d(D)] -> [d(E)] -> [d(&(d(F),d(G))),d(H)]
750                                    // .inspect(|x| println!("      >> {}", x.iter().map(|i| format!("_{i}")).join(", ")))
751                                    .cproduct()
752                                    // .inspect(|x| println!("      << {}", x.iter().map(|i| format!("_{i}")).join(", ")))
753                                    // [dup(A),dup(B),dup(C),dup(E),d(&)] -> [dup(A),dup(B),dup(C),dup(E),d(H)] ->
754                                    //       [dup(A),dup(B),dup(D),dup(E),d(&)] -> [dup(A),dup(B),dup(D),dup(E),d(H)]
755                                    .map(|dup_ids| {
756                                        let mut nodes = dup_ids.into_iter()
757                                            .flat_map(|dup_id| dups.get_mut(dup_id).unwrap().iter_mut()
758                                                .map(|dup| new.get_dup(dup)).to_vec()).to_vec();
759                                        remove_concat_dup_empty(&new, &mut nodes);
760                                        nodes
761                                    })
762                                    // .inspect(|x| println!("      :: {}", x.iter().map(|i| format!("{i}")).join(", ")))
763                                    .to_vec();
764                                // [A,B,C,E,F,G] -> [A',B',C',E',H] -> [A'',B'',D,E'',F',G'] -> [A''',B''',D',E''',H']
765                                let concats = concats_children.into_iter()
766                                    .map(|children_ids| new.addci_iter(None, gnode!(&), children_ids))
767                                    .to_vec();
768                                // Vec<node id of &-branch>
769                                new.addci_iter(None, gnode!(|), concats)
770                            };
771                            stack.push(new_id);
772                        }
773                        GrNode::Maybe => {
774                            if n != 1 {
775                                self.log.add_error(format!("normalize_var({}): ? must have one child; found {n}: {}",
776                                                           Symbol::NT(var).to_str(self.get_symbol_table()),
777                                                           orig.to_str(Some(sym.index), self.get_symbol_table())));
778                            } else {
779                                // self              new
780                                // -------------------------------
781                                // ?(A)           -> |(A,ε)
782                                // ?(&(A,B))      -> |(&(A,B),ε)
783                                // ?(|(&(A,B),C)) -> |(&(A,B),C,ε)
784                                if VERBOSE { print!("  ?: "); }
785                                let maybe_child = stack.pop().unwrap();
786                                let proceed = match grtree_cleanup(&mut new, Some(maybe_child), true) {
787                                    None => {
788                                        self.log.add_error(format!(
789                                            "unexpected child of ?: {} (should be &, |, or symbol)",
790                                            grtree_to_str(&new, Some(maybe_child), None, Some(var), self.get_symbol_table(), false)));
791                                        if VERBOSE { println!("ERROR: unexpected child of ?: {}", grtree_to_str(&new, Some(maybe_child), None, Some(var), self.get_symbol_table(), false)); }
792                                        return;
793                                    }
794                                    // (is_empty, had_empty_term)
795                                    Some((true, _)) => {
796                                        // the child is `ε`
797                                        stack.push(maybe_child);
798                                        if VERBOSE { println!("child of ? simplified to ε"); }
799                                        false
800                                    }
801                                    _ => true,
802                                };
803                                if proceed {
804                                    let empty = new.add(None, gnode!(e));
805                                    let id = match new.get(maybe_child) {
806                                        GrNode::Or => {
807                                            new.add(Some(maybe_child), gnode!(e));
808                                            maybe_child
809                                        }
810                                        _ => new.addci_iter(None, gnode!(|), [maybe_child, empty])
811                                    };
812                                    stack.push(id);
813                                }
814                            }
815                        }
816                        GrNode::Plus | GrNode::Star => {
817                            // + can change to *, so we treat both of them at the same time
818                            //
819                            // P -> αβ+γ becomes P -> αQγ
820                            //                   Q -> βQ | β
821                            //
822                            //     self              new  new(Q=next_var_id)               simpler format
823                            //     ----------------------------------------------------------------------------------------
824                            //     +(A)           -> Q    |(&(A,Q), A')                    AQ|A
825                            //     +(&(A,B))      -> Q    |(&(A,B,Q),&(A',B'))             ABQ|AB
826                            //     +(|(&(A,B),C)) -> Q    |(&(A,B,Q),&(C,Q'),&(A',B'),C')  (AB|C)Q | (AB|C) = ABQ|CQ | AB|C
827                            //
828                            // P -> αβ*γ becomes P -> αQγ
829                            //                   Q -> βQ | ε
830                            //
831                            //     self              new  new(Q=next_var_id)     simpler format
832                            //     -----------------------------------------------------------------------
833                            //     *(A)           -> Q    |(&(A,Q), ε)           AQ|ε
834                            //     *(&(A,B))      -> Q    |(&(A,B,Q),ε)          ABQ|ε
835                            //     *(|(&(A,B),C)) -> Q    |(&(A,B,Q),&(C,Q'),ε)  (AB|C)Q | ε = ABQ|CQ | ε
836                            let mut is_plus = *sym == GrNode::Plus;
837                            let sym_char = if is_plus { '+' } else { '*' };
838                            if VERBOSE { print!("  {sym_char}: "); }
839                            if n != 1 {
840                                self.log.add_error(format!(
841                                    "normalize_var({}): {sym_char} must have one child; found {n}: {}",
842                                    Symbol::NT(var).to_str(self.get_symbol_table()),
843                                    orig.to_str(Some(sym.index), self.get_symbol_table())));
844                                if VERBOSE { println!("ERROR: found {n} children instead of 1"); }
845                                return;
846                            }
847                            let rep_child = stack.pop().unwrap();
848                            let proceed = match grtree_cleanup(&mut new, Some(rep_child), true) {
849                                None => {
850                                    self.log.add_error(format!(
851                                        "unexpected child of {sym_char}: {} (should be &, |, or symbol)",
852                                        grtree_to_str(&new, Some(rep_child), None, Some(var), self.get_symbol_table(), false)));
853                                    if VERBOSE { println!("ERROR: unexpected child {}", grtree_to_str(&new, Some(rep_child), None, Some(var), self.get_symbol_table(), false)); }
854                                    return;
855                                }
856                                // (is_empty, had_empty_term)
857                                Some((true, _)) => {
858                                    // the child is `ε`
859                                    stack.push(rep_child);
860                                    if VERBOSE { println!("child simplified to ε"); }
861                                    false
862                                }
863                                Some((false, true)) => {
864                                    // the child had the form `α + ε` and is now `α`, so if the operator was +,
865                                    // it must become * since empty must remain a possibility (it doesn't change
866                                    // (anything if it was already *)
867                                    if is_plus {
868                                        is_plus = false;
869                                        // sym_char = '*';
870                                        if VERBOSE { print!(" becomes * (child lost ε term), "); }
871                                    }
872                                    true
873                                }
874                                Some((false, false)) => true, // nothing special, processing below
875                            };
876                            if proceed {
877                                if VERBOSE { println!("=> {}", grtree_to_str(&new, Some(rep_child), None, Some(var), self.get_symbol_table(), false)); }
878                                let orig_rep = orig_new.add(None, if is_plus { gnode!(+) } else { gnode!(*) });
879                                let orig_rep_child = orig_new.add_from_tree(Some(orig_rep), &new, Some(rep_child));
880                                let (id, qvar) = self.normalize_plus_or_star(
881                                    rep_child, orig_rep, orig_rep_child, &mut new, &mut orig_new, var, &mut new_var, is_plus, &mut exclude_nt);
882                                stack.push(id);
883                                orig_rep_vars.insert(qvar, orig_rep); // to replace later
884                            }
885                        }
886                        _ => panic!("Unexpected {}", sym.deref())
887                    }
888                }
889                if VERBOSE {
890                    println!("stack: {}", stack.iter()
891                        .map(|id| {
892                            let children = new.children(*id);
893                            format!("{id}:{}{}", new.get(*id), if children.is_empty() { "".to_string() } else { format!("({})", children.iter().join(",")) })
894                        }).join(", ")
895                    );
896                }
897            }
898            if stack.len() != 1 {
899                self.log.add_error(format!("normalize_var({}): error while normalizing the rules, {} remaining nodes instead of 1",
900                                           Symbol::NT(var).to_str(self.get_symbol_table()), stack.len()));
901                return;
902            }
903            if VERBOSE_CC { println!("Final stack id: {}", stack[0]); }
904            let root = stack.pop().unwrap();
905            new.set_root(root);
906            match grtree_cleanup(&mut new, None, false) {
907                None => {
908                    self.log.add_error(format!(
909                        "unexpected root of {} -> {} (should be &, |, or symbol)",
910                        Symbol::NT(var).to_str(self.get_symbol_table()),
911                        grtree_to_str(&new, None, None, Some(var), self.get_symbol_table(), false)));
912                    if VERBOSE { println!("ERROR: unexpected root {}", grtree_to_str(&new, None, None, Some(var), self.get_symbol_table(), false)); }
913                }
914                // (is_empty, had_empty_term)
915                Some((true, _)) => {
916                    self.log.add_warning(format!("{} is empty", Symbol::NT(var).to_str(self.get_symbol_table())));
917                }
918                _ => {}
919            }
920            let orig_root = orig_new.add_from_tree_callback(None, &new, None, |from, to, _| self.origin.add((var, to), (var, from)));
921            orig_new.set_root(orig_root);
922            while !orig_rep_vars.is_empty() {
923                // We must replace new nonterminals with their original (though normalized) +* content, but we can't
924                // update `orig_new` while we're iterating it, so we proceed in two steps:
925                // - iterate in `orig_new`, locate the new +* nonterminals and put the node indices in orig_rep_nodes
926                // - iterate orig_rep_nodes and modify nodes in orig_new
927                // Since each replacement can make new nonterminals visible (if they're embedded in one another),
928                // we must repeat those steps until all `orig_rep_vars` have been found and replaced.
929                let mut orig_rep_nodes = Vec::<(usize, usize)>::new();
930                let mut to_remove = Vec::<VarId>::new();
931                for node in orig_new.iter_post_depth() {
932                    if let GrNode::Symbol(Symbol::NT(rep_var)) = node.deref() {
933                        if let Some(&orig_rep_id) = orig_rep_vars.get(rep_var) {
934                            to_remove.push(*rep_var);
935                            orig_rep_nodes.push((node.index, orig_rep_id));
936                            self.origin.add((*rep_var, self.get_tree(*rep_var).unwrap().get_root().unwrap()), (var, orig_rep_id));
937                        }
938                    }
939                }
940                for (orig_id, child_id) in orig_rep_nodes {
941                    *orig_new.get_mut(orig_id) = gnode!(inst);
942                    orig_new.attach_child(orig_id, child_id);
943                }
944                for var in to_remove {
945                    orig_rep_vars.remove(&var);
946                }
947            }
948            self.origin.set_tree(var, orig_new);
949            self.set_tree(var, new);
950        }
951    }
952
953    fn normalize_plus_or_star(
954        &mut self, rep_child: usize, orig_rep: usize, orig_rep_child: usize,
955        new: &mut GrTree, orig_new: &mut GrTree, var: VarId, new_var: &mut VarId, is_plus: bool,
956        exclude_nt: &mut HashSet<VarId>
957    ) -> (usize, VarId)
958    {
959        const VERBOSE: bool = false;
960        const OPTIMIZE_SUB_OR: bool = false;
961        if let Some(st) = self.symbol_table.as_ref() {
962            assert_eq!(st.get_num_nt(), self.trees.len(), "number of nt in symbol table doesn't match num_nt");
963        }
964        let (mut qvar, mut rvar) = (*new_var, *new_var + 1);
965        let mut qtree = GrTree::new();
966        let mut rtree = GrTree::new();
967        let mut use_rtree = false;
968
969        // finds possible occurences of <L=var>, detects conflicts, and updates qvar/rvar if necessary
970        let mut lform_nt = None;
971        for node in orig_new.iter_post_depth_at(orig_rep_child) {
972            if let GrNode::LForm(v) = *node {
973                if matches!(lform_nt, Some(v2) if v != v2) {
974                    let symtab = self.get_symbol_table();
975                    self.log.add_error(
976                        format!("in {}, {}: conflicting <L={}> and <L={}>",
977                                Symbol::NT(var).to_str(symtab),
978                                grtree_to_str(orig_new, Some(orig_rep), None, Some(var), symtab, false),
979                                Symbol::NT(lform_nt.unwrap()).to_str(symtab), Symbol::NT(v).to_str(symtab)));
980                } else {
981                    lform_nt = Some(v);
982                    (qvar, rvar) = (v, *new_var);
983                }
984            }
985        }
986
987        // See comments in `normalize_var` near the calls to this method for details about the operations below.
988        // We copy from the origin tree `orig_new` to trace the new node IDs to the original ones.
989        match orig_new.get(orig_rep_child) {
990            GrNode::Symbol(s) => {
991                if VERBOSE { print!("({rep_child}:{s}) "); }
992                // note: we cannot use the child id in qtree!
993                let or = qtree.add_root(gnode!(|));
994                let cc = qtree.add(Some(or), gnode!(&));
995                let child = qtree.add(Some(cc), GrNode::Symbol(*s));
996                qtree.add(Some(cc), gnode!(nt qvar));
997                let child2 = qtree.add(Some(or), if is_plus { GrNode::Symbol(*s) } else { gnode!(e) });
998                self.origin.add((qvar, child), (var, orig_rep_child));     // useful?
999                self.origin.add((qvar, cc), (var, orig_rep_child));
1000                if is_plus {
1001                    self.origin.add((qvar, child2), (var, orig_rep_child));
1002                }
1003            }
1004            GrNode::Concat => {
1005                let id_grchildren = new.children(rep_child);
1006                if VERBOSE { print!("({rep_child}:&({})) ", id_grchildren.iter().join(", ")); }
1007                let or = qtree.add_root(gnode!(|));
1008                let cc1 = qtree.add_from_tree_callback(Some(or), orig_new, Some(orig_rep_child), |to, from, _n| {
1009                    self.origin.add((qvar, to), (var, from))
1010                });
1011                let loop_id = qtree.add(Some(cc1), gnode!(nt qvar));
1012                self.origin.add((qvar, loop_id), (var, orig_rep));
1013                if is_plus {
1014                    let loop_id2 = qtree.add_from_tree(Some(or), new, Some(rep_child));
1015                    self.origin.add((qvar, loop_id2), (var, orig_rep_child));
1016                } else {
1017                    qtree.add(Some(or), gnode!(e));
1018                }
1019            }
1020            #[allow(unreachable_patterns)]
1021            GrNode::Or => if !OPTIMIZE_SUB_OR {
1022                let id_grchildren = new.children(rep_child);
1023                if VERBOSE { print!("({rep_child}:|({})) ", id_grchildren.iter().join(", ")); }
1024                let orig_id_grchildren = orig_new.children(orig_rep_child);
1025                let or = qtree.add_root(gnode!(|));
1026                for orig_id_grchild in orig_id_grchildren {
1027                    let orig_grchild = orig_new.get(*orig_id_grchild);
1028                    match orig_grchild {
1029                        GrNode::Symbol(s) => {
1030                            let cc = qtree.add(Some(or), gnode!(&));
1031                            let child = qtree.add_iter(Some(cc), [GrNode::Symbol(*s), gnode!(nt qvar)])[0];
1032                            self.origin.add((qvar, cc), (var, *orig_id_grchild));
1033                            self.origin.add((qvar, child), (var, *orig_id_grchild));
1034                            if is_plus {
1035                                let plus_or = qtree.add(Some(or), GrNode::Symbol(*s));
1036                                self.origin.add((qvar, plus_or), (var, *orig_id_grchild));
1037                            }
1038                        }
1039                        GrNode::Concat => {
1040                            let cc = qtree.add_from_tree_callback(Some(or), orig_new, Some(*orig_id_grchild), |to, from, _n| {
1041                                self.origin.add((qvar, to), (var, from));
1042                            });
1043                            qtree.add(Some(cc), gnode!(nt qvar));
1044                            if is_plus {
1045                                qtree.add_from_tree_callback(Some(or), orig_new, Some(*orig_id_grchild), |to, from, _| {
1046                                    self.origin.add((qvar, to), (var, from));
1047                                });
1048                            }
1049                        }
1050                        x => panic!("unexpected node type under a | node: {x}"),
1051                    }
1052                }
1053                if !is_plus {
1054                    qtree.add(Some(or), gnode!(e));
1055                }
1056            }
1057            // TODO: remove this optimization?
1058            #[allow(unreachable_patterns)]
1059            GrNode::Or => if OPTIMIZE_SUB_OR {
1060                // P -> αβ*γ becomes P -> αQγ         P -> α(β)+γ becomes P -> αQγ
1061                //                   Q -> βQ | ε                          Q -> βR
1062                // β can be β1|β2|..., which is distributed               R -> Q | ε
1063                //
1064                // self             new(Q=new_var)                   simpler format
1065                // ---------------------------------------------------------------------------------
1066                // *(|(&(A,B),C))   |(&(A,B,Q),&(C,Q'),ε)            Q -> ABQ|CQ | ε
1067                // +(|(&(A,B),C))   |(&(A,B,Q),&(C,Q'),&(A',B'),C')  Q -> ABQ|CQ | AB|C
1068                //
1069                // new method:
1070                // *(|(&(A,B),C))   |(&(A,B,Q),&(C,Q'),ε)            Q -> ABQ|CQ | ε
1071                // +(|(&(A,B),C))   |(&(A,B,Q),&(C,Q'))              Q -> ABR|CR         R -> Q | ε
1072                let id_grchildren = new.children(rep_child);
1073                if VERBOSE { print!("({rep_child}:|({})) ", id_grchildren.iter().join(", ")); }
1074                let orig_id_grchildren = orig_new.children(orig_rep_child);
1075                let or = qtree.add_root(gnode!(|));
1076                for orig_id_grchild in orig_id_grchildren {
1077                    let orig_grchild = new.get(*orig_id_grchild);
1078                    match orig_grchild {
1079                        GrNode::Symbol(s) => {
1080                            if is_plus {
1081                                qtree.addc_iter(Some(or), gnode!(&), [GrNode::Symbol(*s), gnode!(nt rvar)]);
1082                                use_rtree = true;
1083                            } else {
1084                                qtree.addc_iter(Some(or), gnode!(&), [GrNode::Symbol(*s), gnode!(nt qvar)]);
1085                            }
1086                        }
1087                        GrNode::Concat => {
1088                            let cc = qtree.add_from_tree_callback(Some(or), orig_new, Some(*orig_id_grchild), |to, from, _n| {
1089                                self.origin.add((qvar, to), (var, from));
1090                            });
1091                            if is_plus {
1092                                qtree.add(Some(cc), gnode!(nt rvar));
1093                            } else {
1094                                qtree.add(Some(cc), gnode!(nt qvar));
1095                            }
1096                        }
1097                        x => panic!("unexpected node type under a | node: {x}"),
1098                    }
1099                }
1100                if use_rtree {
1101                    let or1 = rtree.add_root(gnode!(|));
1102                    rtree.add_iter(Some(or1), [gnode!(nt qvar), gnode!(e)]);
1103                } else if !is_plus {
1104                    qtree.add(Some(or), gnode!(e));
1105                }
1106            }
1107            _ => panic!("Unexpected node type under a + node: {}", new.get(rep_child))
1108        }
1109        if let Some(v) = lform_nt {
1110            // `new_var` replaces `v`
1111            if v == var {
1112                self.log.add_error(
1113                    format!("in {}, {}: <L> points to the same nonterminal. It must be a new one, created for the loop.",
1114                            Symbol::NT(var).to_str(self.get_symbol_table()),
1115                            grtree_to_str(orig_new, Some(orig_rep), None, Some(var), self.get_symbol_table(), false)));
1116            } else {
1117                // self.nt_conversion.insert(v, MovedTo(qvar));
1118                for mut node in qtree.iter_post_depth_simple_mut() {
1119                    if let GrNode::LForm(v2) = node.deref_mut() {
1120                        if *v2 == v { *v2 = qvar; }
1121                    }
1122                }
1123                for mut node in orig_new.iter_post_depth_simple_at_mut(orig_rep_child) {
1124                    if let GrNode::LForm(v2) = node.deref_mut() {
1125                        if *v2 == v { *v2 = qvar; }
1126                    }
1127                }
1128            }
1129        }
1130        if let Some(st) = self.symbol_table.as_mut() {
1131            if lform_nt.is_none() {
1132                assert_eq!(st.add_child_nonterminal(var), qvar);
1133            }
1134            if use_rtree {
1135                assert_eq!(st.add_child_nonterminal(var), rvar);
1136            }
1137        }
1138        let id = new.add(None, gnode!(nt qvar));
1139        assert!(qvar as usize >= self.trees.len() || self.trees[qvar as usize].is_empty(), "overwriting tree {new_var}");
1140        if VERBOSE { println!("qtree: NT[{qvar}] {} -> {}", Symbol::NT(qvar).to_str(self.get_symbol_table()), grtree_to_str(&qtree, None, None, Some(qvar), self.get_symbol_table(), false) /*qtree.to_str(None, self.get_symbol_table())*/); }
1141        self.set_tree(qvar, qtree);
1142        exclude_nt.insert(qvar);
1143        self.flags.resize(rvar as usize, 0);
1144        self.parent.resize(rvar as usize, None);
1145        let plus_flag = if is_plus { ruleflag::REPEAT_PLUS } else { 0 };
1146        self.flags[qvar as usize] = ruleflag::CHILD_REPEAT | plus_flag;
1147        self.flags[var as usize] |= ruleflag::PARENT_REPEAT | plus_flag;
1148        self.parent[qvar as usize] = Some(var);
1149        if use_rtree {
1150            if VERBOSE { println!("rtree: NT[{rvar}] {} -> {}", Symbol::NT(rvar).to_str(self.get_symbol_table()), grtree_to_str(&rtree, None, None, Some(rvar), self.get_symbol_table(), false)); }
1151            self.set_tree(rvar, rtree);
1152            exclude_nt.insert(var);
1153            self.flags.resize(rvar as usize + 1, 0);
1154            self.parent.resize(rvar as usize + 1, None);
1155            self.flags[rvar as usize] |= ruleflag::CHILD_L_FACT;
1156            self.parent[rvar as usize] = Some(qvar);
1157            self.flags[qvar as usize] |= ruleflag::PARENT_L_FACTOR;
1158        }
1159        if VERBOSE {
1160            println!("=> new sizes, flags = {}, parent = {}, trees = {} (new_var = {new_var})", self.flags.len(), self.parent.len(), self.trees.len());
1161            println!("=> {}: parent {}, child {}{}",
1162                     if is_plus { "+" } else { "*" },
1163                     Symbol::NT(var).to_str(self.get_symbol_table()),
1164                     Symbol::NT(qvar).to_str(self.get_symbol_table()),
1165                     if use_rtree { format!(", child {}", Symbol::NT(rvar).to_str(self.get_symbol_table())) } else { String::new() }
1166            );
1167        }
1168        // We rectify the parent/child relationship in case of cascaded + or *. Since we perform a
1169        // bottom-up reconstruction, a rule like A -> ( ( a )+ b )+ will yield
1170        //   A -> A_2
1171        //   A_1 -> a A_1 | ε       parent: A
1172        //   A_2 -> A_1 b A_2 | ε   parent: A
1173        // We want A_1's parent to be A_2. We keep the wrong order in the parent chain: A_1 -> A_2 -> A,
1174        // which is unfortunate in some later tests but still easier than changing everything here.
1175        let mut rectify_maybe = None;
1176        for node in self.get_tree(qvar).unwrap().iter_post_depth_simple() {
1177            if let GrNode::Symbol(Symbol::NT(child)) = node.deref() {
1178                if *child != qvar && self.flags[*child as usize] & ruleflag::CHILD_REPEAT != 0 {
1179                    rectify_maybe = Some(*child);
1180                    break;
1181                }
1182            }
1183        }
1184        if let Some(child) = rectify_maybe {
1185            self.parent[child as usize] = Some(qvar);
1186            self.flags[qvar as usize] |= ruleflag::PARENT_REPEAT;
1187            if VERBOSE {
1188                println!("=> rectify {}'s parent as {}",
1189                         Symbol::NT(child).to_str(self.get_symbol_table()),
1190                         Symbol::NT(qvar).to_str(self.get_symbol_table()));
1191            }
1192        }
1193        *new_var = self.get_next_available_var();
1194        (id, qvar)
1195    }
1196}
1197
1198// Mutable methods for the General form.
1199impl Default for RuleTreeSet<General> {
1200    fn default() -> Self {
1201        Self::new()
1202    }
1203}
1204
1205impl BuildFrom<RuleTreeSet<General>> for RuleTreeSet<Normalized> {
1206    /// Transforms a `General` ruleset to a `Normalized` ruleset
1207    ///
1208    /// If an error is encountered or was already encountered before, an empty shell object
1209    /// is built with the log detailing the error(s).
1210    fn build_from(mut rules: RuleTreeSet<General>) -> Self {
1211        // We handle the errors by transmitting the log to the next construct rather than returning a `Result` type.
1212        // This allows to cascade the transforms without getting a complicated error resolving system while preserving
1213        // the information about the errors easily.
1214        if rules.log.has_no_errors() {
1215            rules.normalize();
1216        }
1217        RuleTreeSet::<Normalized> {
1218            trees: rules.trees,
1219            start: rules.start,
1220            symbol_table: rules.symbol_table,
1221            flags: rules.flags,
1222            parent: rules.parent,
1223            nt_conversion: rules.nt_conversion,
1224            origin: rules.origin,
1225            log: rules.log,
1226            _phantom: PhantomData
1227        }
1228    }
1229}
1230
1231// impl BuildFrom<RuleTreeSet<Normalized>> for RuleTreeSet<General> {
1232//     /// Transforms a `Normalized` ruleset to a `General` ruleset
1233//     fn build_from(mut rules: RuleTreeSet<Normalized>) -> Self {
1234//         RuleTreeSet::<General> { trees: rules.trees, next_var: rules.next_var, _phantom: PhantomData }
1235//     }
1236// }
1237
1238// ---------------------------------------------------------------------------------------------
1239// Macros
1240
1241pub mod macros {
1242    /// Generates a `GrNode` instance.
1243    ///
1244    /// # Examples
1245    /// ```
1246    /// # use lexigram_lib::{TokenId, VarId, gnode, parser::Symbol};
1247    /// # use lexigram_lib::grammar::GrNode;
1248    /// assert_eq!(gnode!([1]), GrNode::Symbol(Symbol::T(1 as TokenId)));
1249    /// assert_eq!(gnode!(t 2), GrNode::Symbol(Symbol::T(2 as TokenId)));
1250    /// assert_eq!(gnode!(nt 3), GrNode::Symbol(Symbol::NT(3 as VarId)));
1251    /// assert_eq!(gnode!(e), GrNode::Symbol(Symbol::Empty));
1252    /// assert_eq!(gnode!(end), GrNode::Symbol(Symbol::End));
1253    /// assert_eq!(gnode!(&), GrNode::Concat);
1254    /// assert_eq!(gnode!(|), GrNode::Or);
1255    /// assert_eq!(gnode!(?), GrNode::Maybe);
1256    /// assert_eq!(gnode!(+), GrNode::Plus);
1257    /// assert_eq!(gnode!(*), GrNode::Star);
1258    /// assert_eq!(gnode!(L 3), GrNode::LForm(3));
1259    /// assert_eq!(gnode!(R), GrNode::RAssoc);
1260    /// assert_eq!(gnode!(P), GrNode::PrecEq);
1261    /// assert_eq!(gnode!(inst), GrNode::Instance);
1262    /// assert_eq!(gnode!(G), GrNode::Greedy);
1263    /// ```
1264    #[macro_export]
1265    macro_rules! gnode {
1266        ([$id:expr]) => { gnode!(t $id) };
1267        (t $id:expr) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::T($id as $crate::TokenId)) };
1268        (nt $id:expr) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::NT($id as $crate::VarId)) };
1269        (e) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::Empty) };
1270        (end) => { $crate::grammar::GrNode::Symbol($crate::parser::Symbol::End) };
1271        //
1272        (&) => { $crate::grammar::GrNode::Concat };
1273        (|) => { $crate::grammar::GrNode::Or };
1274        (?) => { $crate::grammar::GrNode::Maybe };
1275        (+) => { $crate::grammar::GrNode::Plus };
1276        (*) => { $crate::grammar::GrNode::Star };
1277        (L $id:expr) => { $crate::grammar::GrNode::LForm($id) };
1278        (R) => { $crate::grammar::GrNode::RAssoc };
1279        (P) => { $crate::grammar::GrNode::PrecEq };
1280        (inst) => { $crate::grammar::GrNode::Instance };
1281        (G) => { $crate::grammar::GrNode::Greedy };
1282    }
1283
1284    /// Generates a `Symbol` instance.
1285    ///
1286    /// # Examples
1287    /// ```
1288    /// # use lexigram_lib::{TokenId, VarId, sym};
1289    /// # use lexigram_lib::parser::Symbol;
1290    /// assert_eq!(sym!(t 2), Symbol::T(2 as TokenId));
1291    /// assert_eq!(sym!(nt 3), Symbol::NT(3 as VarId));
1292    /// assert_eq!(sym!(e), Symbol::Empty);
1293    /// assert_eq!(sym!(end), Symbol::End);
1294    #[macro_export]
1295    macro_rules! sym {
1296        (t $id:expr) => { $crate::parser::Symbol::T($id as $crate::TokenId) };
1297        (nt $id:expr) => { $crate::parser::Symbol::NT($id as $crate::VarId) };
1298        (e) => { $crate::parser::Symbol::Empty };
1299        (end) => { $crate::parser::Symbol::End };
1300    }
1301
1302    #[macro_export]
1303    macro_rules! altflag {
1304        (L) => { $crate::alt::ruleflag::L_FORM };
1305        (R) => { $crate::alt::ruleflag::R_ASSOC };
1306        (G) => { $crate::alt::ruleflag::GREEDY };
1307        (P) => { $crate::alt::ruleflag::PREC_EQ };
1308        ($f:expr) => { $f };
1309    }
1310
1311    /// Generates a production rule alternative. An alternative is made up of symbols separated by a comma.
1312    /// Each symbol is either
1313    /// - a non-terminal: `nt` {integer}
1314    /// - a terminal: `t` {integer}
1315    /// - the empty symbol: `e`
1316    ///
1317    /// Preceding an alternative with `# {integer}` sets a flag value on that alternative. The values are:
1318    /// - 128: L-form (low-latency parsing of that alternative)
1319    /// - 256: R-assoc (right-associative - by default, ambiguous alternatives like 'E * E' are left-associative)
1320    ///
1321    /// # Example
1322    /// ```
1323    /// # use lexigram_core::TokenId;
1324    /// # use lexigram_core::alt::Alternative;
1325    /// # use lexigram_lib::{alt, sym};
1326    /// assert_eq!(alt!(nt 1, t 2, e), Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(e)]));
1327    /// assert_eq!(alt!(#128, nt 1, t 2, e), Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(e)]).with_flags(128));
1328    /// assert_eq!(alt!(#L, nt 1, t 2, e), Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(e)]).with_flags(128));
1329    /// let x = 256;
1330    /// let o_id = 4;
1331    /// assert_eq!(alt!(#(x, o_id), nt 0, t 1, e), Alternative::new(vec![sym!(nt 0), sym!(t 1), sym!(e)]).with_flags(256).with_ambig_alt_id(4));
1332    /// ```
1333    #[macro_export]
1334    macro_rules! alt {
1335        () => { $crate::alt::Alternative::new(std::vec![]) };
1336        ($($a:ident $($b:expr)?,)+) => { alt!($($a $($b)?),+) };
1337        ($($a:ident $($b:expr)?),*) => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]) };
1338        (#$f:literal, $($a:ident $($b:expr)?,)+) => { alt!(#$f, $($a $($b)?),+) };
1339        (#$f:literal, $($a:ident $($b:expr)?),*) => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($f) };
1340        // (#$f:ident, $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?,)+) => { alt!(#$f, $(%($v, $id),)? $($a $($b)?),+) };
1341        // (#$f:ident, $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*)
1342        //     => {$crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($crate::altflag!($f))$(.with_orig($v, $id))? };
1343        ($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?,)+) => { alt!($(#$f,)? $(%($v, $id),)? $($a $($b)?),+) };
1344        ($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*)
1345            => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*])$(.with_flags($crate::altflag!($f)))?$(.with_origin($v, $id))? };
1346        // TODO: change "#" parts below
1347        (#($f:expr, $o:expr), $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?,)+)
1348            => { alt!(#($f, $o), $(%($v, $id),)? $($a $($b)?),+) };
1349        (#($f:expr, $o:expr), $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*)
1350            => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($crate::altflag!($f)).with_ambig_alt_id($o)$(.with_origin($v, $id))? };
1351        (%($v:expr, $id:expr), $($a:ident $($b:expr)?,)+)
1352            => { alt!(%($v, $id), $($a $($b)?),+) };
1353        (%($v:expr, $id:expr), $($a:ident $($b:expr)?),*)
1354            => { $crate::alt::Alternative::new(std::vec![$($crate::sym!($a $($b)?)),*]).with_flags($crate::altflag!($f)).with_origin($v, $id) };
1355    }
1356
1357    #[macro_export]
1358    macro_rules! symbols {
1359        () => { std::vec![] };
1360        ($($a:ident $($b:literal $(: $num:expr)?)?,)+) => { symbols![$($a $($b $(: $num)?)?),+] };
1361        ($($a:ident $($b:literal $(: $num:expr)?)?),*) => { std::vec![$($crate::sym!($a $($b $(: $num)?)?)),*] };
1362    }
1363
1364    /// Generates a production rule. It is made up of alternatives separated by a semicolon.
1365    ///
1366    /// Example
1367    /// ```
1368    /// # use lexigram_lib::{TokenId, VarId, parser::Symbol};
1369    /// # use lexigram_core::alt::Alternative;
1370    /// # use lexigram_lib::{prule, alt, sym};
1371    /// assert_eq!(prule!(nt 1, t 2, nt 1, t 3; nt 2; e),
1372    ///            vec![Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(nt 1), sym!(t 3)]),
1373    ///                 Alternative::new(vec![sym!(nt  2)]),
1374    ///                 Alternative::new(vec![sym!(e)])]);
1375    /// assert_eq!(prule!(nt 1, t 2, nt 1, t 3; #128, nt 2; e),
1376    ///            vec![Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(nt 1), sym!(t 3)]),
1377    ///                 Alternative::new(vec![sym!(nt  2)]).with_flags(128),
1378    ///                 Alternative::new(vec![sym!(e)])]);
1379    /// assert_eq!(prule!(nt 1, t 2, nt 1, t 3; #L, nt 2; e),
1380    ///            vec![Alternative::new(vec![sym!(nt 1), sym!(t 2), sym!(nt 1), sym!(t 3)]),
1381    ///                 Alternative::new(vec![sym!(nt  2)]).with_flags(128),
1382    ///                 Alternative::new(vec![sym!(e)])]);
1383    /// ```
1384    #[macro_export]
1385    macro_rules! prule {
1386        () => { std::vec![] };
1387        ($($(#$f:literal,)? $($a:ident $($b:expr)?),*;)+) => { prule![$($(#$f,)? $($a $($b)?),+);+] };
1388        ($($(#$f:literal,)? $($a:ident $($b:expr)?),*);*) => { std::vec![$($crate::alt![$(#$f,)? $($a $($b)?),+]),*]};
1389        ($($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*;)+) => { prule![$($(#$f,)? $(%($v, $id),)? $($a $($b)?),+);+] };
1390        ($($(#$f:ident,)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*);*) => { std::vec![$($crate::alt![$(#$f,)? $(%($v, $id),)? $($a $($b)?),+]),*]};
1391        // TODO: change "#" part below
1392        ($($(#($f:expr, $o:expr),)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*;)+) => { prule![$($(#($f, $o),)? $(%($v, $id),)? $($a $($b)?),+);+] };
1393        ($($(#($f:expr, $o:expr),)? $(%($v:expr, $id:expr),)? $($a:ident $($b:expr)?),*);*) => { std::vec![$($crate::alt![$(#($f,$o),)? $(%($v, $id),)? $($a $($b)?),+]),*]};
1394    }
1395}