lexigram_core/
alt.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3use std::collections::{HashMap, HashSet};
4use std::ops::{Deref, DerefMut};
5use crate::fixed_sym_table::SymInfoTable;
6use crate::parser::Symbol;
7use crate::{AltId, CollectJoin, VarId};
8
9// ---------------------------------------------------------------------------------------------
10
11// easier to use than an enum
12pub mod ruleflag {
13    /// Star or Plus repeat child alternative.
14    /// Set by `RuleTreeSet<General>::normalize_plus_or_star()` in `flags`.
15    pub const CHILD_REPEAT: u32 = 1;
16    /// Right-recursive child NT.
17    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
18    pub const R_RECURSION: u32 = 2;
19    /// Left-recursive child NT.
20    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
21    pub const CHILD_L_RECURSION: u32 = 4;
22    /// Left-recursive, ambiguous child NT.
23    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
24    pub const CHILD_AMBIGUITY: u32 = 8;
25    /// Child NT created to regroup the independent alts when transforming an ambiguous, recursive rule.
26    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
27    pub const CHILD_INDEPENDENT_AMBIGUITY: u32 = 16;
28    /// Left-factorized parent NT.
29    /// Set by `ProdRuleSet<T>::left_factorize()` in `flags`.
30    pub const PARENT_L_FACTOR: u32 = 32;
31    /// Left-factorized child NT.
32    /// Set by `ProdRuleSet<T>::left_factorize()` in `flags`.
33    pub const CHILD_L_FACT: u32 = 64;
34    /// Low-latency non-terminal alternative, used with `CHILD_REPEAT` or `R_RECURSION`.
35    /// Set by `ProdRuleSet<General>::build_from(rules: BuildFrom<RuleTreeSet<Normalized>>` in `flags`.
36    pub const L_FORM: u32 = 128;
37    /// Right-associative alternative.
38    /// Set by `ProdRuleSet<General>::build_from(rules: BuildFrom<RuleTreeSet<Normalized>>` in alts.
39    pub const R_ASSOC: u32 = 256;
40    /// Left-recursive parent NT.
41    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
42    pub const PARENT_L_RECURSION: u32 = 512;
43    /// Left-recursive, ambiguous parent NT.
44    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
45    pub const PARENT_AMBIGUITY: u32 = 1024;
46    /// Star or Plus repeat parent alternative.
47    /// Set by `RuleTreeSet<General>::normalize_plus_or_star()` in `flags`.
48    pub const PARENT_REPEAT: u32 = 2048;
49    /// CHILD_REPEAT and PARENT_REPEAT is +, not * (used with both flags)
50    pub const REPEAT_PLUS: u32 = 4096;
51    /// GREEDY alternative: is expected to generate an ambiguity in the parsing table
52    pub const GREEDY: u32 = 8192;
53    /// Precedence identical to previous alternative (only valid for binary left-/right-associative)
54    pub const PREC_EQ: u32 = 16384;
55
56    pub const TRANSF_PARENT: u32 = /*R_RECURSION |*/ PARENT_L_FACTOR | PARENT_L_RECURSION | PARENT_AMBIGUITY | PARENT_REPEAT;
57    pub const TRANSF_CHILD: u32 = CHILD_REPEAT | CHILD_L_RECURSION | CHILD_AMBIGUITY | CHILD_L_FACT;
58    pub const TRANSF_CHILD_AMB: u32 = CHILD_AMBIGUITY | R_RECURSION | L_FORM;
59    pub const ALTERNATIVE_INFO: u32 = L_FORM | R_ASSOC | GREEDY | PREC_EQ;
60    pub const L_RECURSION: u32 = PARENT_L_RECURSION | CHILD_L_RECURSION;
61
62    pub fn to_string(flags: u32) -> Vec<String> {
63        static NAMES: [(u32, &str); 15] = [
64            (CHILD_REPEAT               , "child_+_or_*"),
65            (R_RECURSION                , "right_rec"),
66            (CHILD_L_RECURSION          , "child_left_rec"),
67            (CHILD_AMBIGUITY            , "child_amb"),
68            (CHILD_INDEPENDENT_AMBIGUITY, "child_ind_amb"),
69            (PARENT_L_FACTOR            , "parent_left_fact"),
70            (CHILD_L_FACT, "child_left_fact"),
71            (L_FORM                     , "L-form"),
72            (R_ASSOC                    , "R-assoc"),
73            (PARENT_L_RECURSION         , "parent_left_rec"),
74            (PARENT_AMBIGUITY           , "parent_amb"),
75            (PARENT_REPEAT              , "parent_+_or_*"),
76            (REPEAT_PLUS                , "plus"),
77            (GREEDY                     , "greedy"),
78            (PREC_EQ                    , "prec_eq"),
79        ];
80        NAMES.iter().filter_map(|(f, t)| if flags & f != 0 { Some(t.to_string()) } else { None } ).collect()
81    }
82
83    pub fn alt_info_to_string(mut flags: u32) -> Vec<String> {
84        static NAMES: [(u32, &str); 4] = [(L_FORM, "L"), (R_ASSOC, "R"), (GREEDY, "G"), (PREC_EQ, "P")];
85        let v: Vec<String> = NAMES.iter().filter_map(|(f, t)|
86            if flags & f != 0 {
87                flags &= !f;
88                Some(t.to_string())
89            } else {
90                None
91            }).collect();
92        v
93    }
94}
95
96// ---------------------------------------------------------------------------------------------
97
98pub fn alt_to_str<T: SymInfoTable>(f: &Vec<Symbol>, symbol_table: Option<&T>) -> String {
99    if f.is_empty() {
100        "<empty>".to_string()
101    } else {
102        f.iter().map(|s| s.to_str_quote(symbol_table)).join(" ")
103    }
104}
105
106pub fn alt_to_rule_str<T: SymInfoTable>(nt: VarId, f: &Vec<Symbol>, symbol_table: Option<&T>) -> String {
107    format!("{} -> {}", Symbol::NT(nt).to_str(symbol_table), alt_to_str(f, symbol_table))
108}
109
110/// Stores a production alternative (or alternative body): `A a` or `B` in `A -> A a | B`.
111///
112/// The [`Alternative`] type behaves like a `Vec<Symbol>` (`Deref` / `DerefMut`), and must be
113/// created with [`Alternative::new`].
114#[derive(Clone, Eq, PartialOrd, Ord, Debug)]
115pub struct Alternative {
116    pub v: Vec<Symbol>,
117    pub flags: u32,          // only for GREEDY, L_FORM and R_ASSOC
118    pub ambig_alt_id: Option<AltId>,
119    pub origin: Option<(VarId, usize)>,
120}
121
122impl Alternative {
123    pub fn new(v: Vec<Symbol>) -> Self {
124        Alternative { v, flags: 0, ambig_alt_id: None, origin: None }
125    }
126
127    pub fn with_flags(mut self, flags: u32) -> Self {
128        self.flags = flags;
129        self
130    }
131
132    pub fn with_ambig_alt_id(mut self, ambig_alt_id: AltId) -> Self {
133        self.ambig_alt_id = Some(ambig_alt_id);
134        self
135    }
136
137    pub fn with_origin(mut self, original_var: VarId, original_index: usize) -> Self {
138        self.origin = Some((original_var, original_index));
139        self
140    }
141
142    pub fn symbols(&self) -> &Vec<Symbol> {
143        &self.v
144    }
145
146    pub fn get_ambig_alt_id(&self) -> Option<AltId> {
147        self.ambig_alt_id
148    }
149
150    pub fn get_origin(&self) -> Option<(VarId, usize)> {
151        self.origin
152    }
153
154    pub fn get_flags(&self) -> u32 {
155        self.flags
156    }
157
158    pub fn to_str<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
159        let mut s = if self.flags & ruleflag::ALTERNATIVE_INFO != 0 {
160            format!("<{}> ", ruleflag::alt_info_to_string(self.flags).join(","))
161        } else {
162            String::new()
163        };
164        s.push_str(&alt_to_str(&self.v, symbol_table));
165        s
166    }
167
168    pub fn to_rule_str<T: SymInfoTable>(&self, nt: VarId, symbol_table: Option<&T>, mut extra_flags: u32) -> String {
169        extra_flags = (extra_flags | self.flags) & ruleflag::ALTERNATIVE_INFO;
170        let s = if extra_flags != 0 {
171            format!("<{}> ", ruleflag::alt_info_to_string(extra_flags).join(","))
172        } else {
173            String::new()
174        };
175        format!("{} -> {s}{}", Symbol::NT(nt).to_str(symbol_table), alt_to_str(&self.v, symbol_table))
176    }
177
178    pub fn to_macro_item(&self) -> String {
179        let mut src = match (self.flags, self.ambig_alt_id, self.origin) {
180            (0, None, None) => String::new(),
181            (f, None, None) => format!("#{f}, "),
182            (f, Some(o), None) => format!("#({f}, {o}), "),
183            (0, None, Some((v, id))) => format!("%({v}, {id}), "),
184            (f, None, Some((v, id))) => format!("#{f}, %({v}, {id}), "),
185            (f, Some(o), Some((v, id))) => format!("#({f}, {o}), %({v}, {id}), "),
186        };
187        src.push_str(&self.v.iter().map(|s| s.to_macro_item()).join(", "));
188        src
189    }
190
191    pub fn to_macro(&self) -> String {
192        format!("alt!({})", self.to_macro_item())
193    }
194
195    pub fn is_sym_empty(&self) -> bool {
196        self.v.len() == 1 && self.v[0] == Symbol::Empty
197    }
198
199    pub fn calc_alt_first(&self, first: &HashMap<Symbol, HashSet<Symbol>>) -> HashSet<Symbol> {
200        let mut new = HashSet::<Symbol>::new();
201        new.extend(first[&self.v[0]].iter().filter(|s| *s != &Symbol::Empty));
202        let mut trail = true;
203        for i in 0..self.v.len() - 1 {
204            let sym_i = &self.v[i];
205            if first[sym_i].contains(&Symbol::Empty) {
206                new.extend(first[&self.v[i + 1]].iter().filter(|s| *s != &Symbol::Empty));
207            } else {
208                trail = false;
209                break;
210            }
211        }
212        if trail && first[self.last().unwrap()].contains(&Symbol::Empty) {
213            new.insert(Symbol::Empty);
214        }
215        new
216    }
217
218    pub fn is_greedy(&self) -> bool {
219        self.flags & ruleflag::GREEDY != 0
220    }
221}
222
223// we use only `v` (the string of symbols) and `flags` the equality test
224impl PartialEq for Alternative {
225    fn eq(&self, other: &Self) -> bool {
226        self.flags == other.flags && self.v == other.v
227    }
228}
229
230impl Deref for Alternative {
231    type Target = Vec<Symbol>;
232
233    fn deref(&self) -> &Self::Target {
234        &self.v
235    }
236}
237
238impl DerefMut for Alternative {
239    fn deref_mut(&mut self) -> &mut Self::Target {
240        &mut self.v
241    }
242}