Skip to main content

lexigram_core/
alt.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3use std::collections::{HashMap, HashSet};
4use std::ops::{Deref, DerefMut};
5use crate::fixed_sym_table::SymInfoTable;
6use crate::parser::Symbol;
7use crate::{AltId, CollectJoin, VarId};
8
9// ---------------------------------------------------------------------------------------------
10
11// easier to use than an enum
12pub mod ruleflag {
13    /// Star or Plus repeat child alternative.
14    /// Set by `RuleTreeSet<General>::normalize_plus_or_star()` in `flags`.
15    pub const CHILD_REPEAT: u32 = 1;
16    /// Right-recursive child NT.
17    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
18    pub const R_RECURSION: u32 = 2;
19    /// Left-recursive child NT.
20    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
21    pub const CHILD_L_RECURSION: u32 = 4;
22    /// Left-recursive, ambiguous child NT.
23    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
24    pub const CHILD_AMBIGUITY: u32 = 8;
25    /// Child NT created to regroup the independent alts when transforming an ambiguous, recursive rule.
26    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
27    pub const CHILD_INDEPENDENT_AMBIGUITY: u32 = 16;
28    /// Left-factorized parent NT.
29    /// Set by `ProdRuleSet<T>::left_factorize()` in `flags`.
30    pub const PARENT_L_FACTOR: u32 = 32;
31    /// Left-factorized child NT.
32    /// Set by `ProdRuleSet<T>::left_factorize()` in `flags`.
33    pub const CHILD_L_FACT: u32 = 64;
34    /// Low-latency non-terminal alternative, used with `CHILD_REPEAT` or `R_RECURSION`.
35    /// Set by `ProdRuleSet<General>::build_from(rules: BuildFrom<RuleTreeSet<Normalized>>` in `flags`.
36    pub const L_FORM: u32 = 128;
37    /// Right-associative alternative.
38    /// Set by `ProdRuleSet<General>::build_from(rules: BuildFrom<RuleTreeSet<Normalized>>` in alts.
39    pub const R_ASSOC: u32 = 256;
40    /// Left-recursive parent NT.
41    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
42    pub const PARENT_L_RECURSION: u32 = 512;
43    /// Left-recursive, ambiguous parent NT.
44    /// Set by `ProdRuleSet<T>::remove_left_recursion()` in `flags`.
45    pub const PARENT_AMBIGUITY: u32 = 1024;
46    /// Star or Plus repeat parent alternative.
47    /// Set by `RuleTreeSet<General>::normalize_plus_or_star()` in `flags`.
48    pub const PARENT_REPEAT: u32 = 2048;
49    /// CHILD_REPEAT and PARENT_REPEAT is +, not * (used with both flags)
50    pub const REPEAT_PLUS: u32 = 4096;
51    /// GREEDY alternative: is expected to generate an ambiguity in the parsing table
52    pub const GREEDY: u32 = 8192;
53    /// Precedence identical to previous alternative (only valid for binary left-/right-associative)
54    pub const PREC_EQ: u32 = 16384;
55    /// List of token-separated items: α (β α)*, where β only contains fixed terminals and α contains at least one value
56    pub const SEP_LIST: u32 = 32768;
57
58    pub const TRANSF_PARENT: u32 = /*R_RECURSION |*/ PARENT_L_FACTOR | PARENT_L_RECURSION | PARENT_AMBIGUITY | PARENT_REPEAT;
59    pub const TRANSF_CHILD: u32 = CHILD_REPEAT | CHILD_L_RECURSION | CHILD_AMBIGUITY | CHILD_L_FACT;
60    pub const TRANSF_CHILD_AMB: u32 = CHILD_AMBIGUITY | R_RECURSION | L_FORM;
61    pub const ALTERNATIVE_INFO: u32 = L_FORM | R_ASSOC | GREEDY | PREC_EQ;
62    pub const L_RECURSION: u32 = PARENT_L_RECURSION | CHILD_L_RECURSION;
63    pub const CHILD_REPEAT_LFORM: u32 = CHILD_REPEAT | L_FORM;
64
65
66    pub fn to_string(flags: u32) -> Vec<String> {
67        static NAMES: [(u32, &str); 16] = [
68            (CHILD_REPEAT               , "child_+_or_*"),
69            (R_RECURSION                , "right_rec"),
70            (CHILD_L_RECURSION          , "child_left_rec"),
71            (CHILD_AMBIGUITY            , "child_amb"),
72            (CHILD_INDEPENDENT_AMBIGUITY, "child_ind_amb"),
73            (PARENT_L_FACTOR            , "parent_left_fact"),
74            (CHILD_L_FACT, "child_left_fact"),
75            (L_FORM                     , "L-form"),
76            (R_ASSOC                    , "R-assoc"),
77            (PARENT_L_RECURSION         , "parent_left_rec"),
78            (PARENT_AMBIGUITY           , "parent_amb"),
79            (PARENT_REPEAT              , "parent_+_or_*"),
80            (REPEAT_PLUS                , "plus"),
81            (GREEDY                     , "greedy"),
82            (PREC_EQ                    , "prec_eq"),
83            (SEP_LIST                   , "sep_list"),
84        ];
85        NAMES.iter().filter_map(|(f, t)| if flags & f != 0 { Some(t.to_string()) } else { None } ).collect()
86    }
87
88    pub fn alt_info_to_string(mut flags: u32) -> Vec<String> {
89        static NAMES: [(u32, &str); 4] = [(L_FORM, "L"), (R_ASSOC, "R"), (GREEDY, "G"), (PREC_EQ, "P")];
90        let v: Vec<String> = NAMES.iter().filter_map(|(f, t)|
91            if flags & f != 0 {
92                flags &= !f;
93                Some(t.to_string())
94            } else {
95                None
96            }).collect();
97        v
98    }
99}
100
101// ---------------------------------------------------------------------------------------------
102
103pub fn alt_to_str<T: SymInfoTable>(f: &[Symbol], symbol_table: Option<&T>) -> String {
104    if f.is_empty() {
105        "<empty>".to_string()
106    } else {
107        f.iter().map(|s| s.to_str_quote(symbol_table)).join(" ")
108    }
109}
110
111pub fn alt_to_rule_str<T: SymInfoTable>(nt: VarId, f: &[Symbol], symbol_table: Option<&T>) -> String {
112    format!("{} -> {}", Symbol::NT(nt).to_str(symbol_table), alt_to_str(f, symbol_table))
113}
114
115/// Stores a production alternative (or alternative body): `A a` or `B` in `A -> A a | B`.
116///
117/// The [`Alternative`] type behaves like a `Vec<Symbol>` (`Deref` / `DerefMut`), and must be
118/// created with [`Alternative::new`].
119#[derive(Clone, Eq, PartialOrd, Ord, Debug)]
120pub struct Alternative {
121    pub v: Vec<Symbol>,
122    pub flags: u32,          // only for GREEDY, L_FORM and R_ASSOC
123    pub ambig_alt_id: Option<AltId>,
124    pub origin: Option<(VarId, usize)>,
125}
126
127impl Alternative {
128    pub fn new(v: Vec<Symbol>) -> Self {
129        Alternative { v, flags: 0, ambig_alt_id: None, origin: None }
130    }
131
132    pub fn with_flags(mut self, flags: u32) -> Self {
133        self.flags = flags;
134        self
135    }
136
137    pub fn with_ambig_alt_id(mut self, ambig_alt_id: AltId) -> Self {
138        self.ambig_alt_id = Some(ambig_alt_id);
139        self
140    }
141
142    pub fn with_origin(mut self, original_var: VarId, original_index: usize) -> Self {
143        self.origin = Some((original_var, original_index));
144        self
145    }
146
147    pub fn symbols(&self) -> &Vec<Symbol> {
148        &self.v
149    }
150
151    pub fn get_ambig_alt_id(&self) -> Option<AltId> {
152        self.ambig_alt_id
153    }
154
155    pub fn get_origin(&self) -> Option<(VarId, usize)> {
156        self.origin
157    }
158
159    pub fn get_flags(&self) -> u32 {
160        self.flags
161    }
162
163    pub fn to_str<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
164        let mut s = if self.flags & ruleflag::ALTERNATIVE_INFO != 0 {
165            format!("<{}> ", ruleflag::alt_info_to_string(self.flags).join(","))
166        } else {
167            String::new()
168        };
169        s.push_str(&alt_to_str(&self.v, symbol_table));
170        s
171    }
172
173    pub fn to_rule_str<T: SymInfoTable>(&self, nt: VarId, symbol_table: Option<&T>, mut extra_flags: u32) -> String {
174        extra_flags = (extra_flags | self.flags) & ruleflag::ALTERNATIVE_INFO;
175        let s = if extra_flags != 0 {
176            format!("<{}> ", ruleflag::alt_info_to_string(extra_flags).join(","))
177        } else {
178            String::new()
179        };
180        format!("{} -> {s}{}", Symbol::NT(nt).to_str(symbol_table), alt_to_str(&self.v, symbol_table))
181    }
182
183    pub fn to_macro_item(&self) -> String {
184        let mut src = match (self.flags, self.ambig_alt_id, self.origin) {
185            (0, None, None) => String::new(),
186            (f, None, None) => format!("#{f}, "),
187            (f, Some(o), None) => format!("#({f}, {o}), "),
188            (0, None, Some((v, id))) => format!("%({v}, {id}), "),
189            (f, None, Some((v, id))) => format!("#{f}, %({v}, {id}), "),
190            (f, Some(o), Some((v, id))) => format!("#({f}, {o}), %({v}, {id}), "),
191        };
192        src.push_str(&self.v.iter().map(|s| s.to_macro_item()).join(", "));
193        src
194    }
195
196    pub fn to_macro(&self) -> String {
197        format!("alt!({})", self.to_macro_item())
198    }
199
200    pub fn is_sym_empty(&self) -> bool {
201        self.v.len() == 1 && self.v[0] == Symbol::Empty
202    }
203
204    pub fn calc_alt_first(&self, first: &HashMap<Symbol, HashSet<Symbol>>) -> HashSet<Symbol> {
205        let mut new = HashSet::<Symbol>::new();
206        new.extend(first[&self.v[0]].iter().filter(|s| *s != &Symbol::Empty));
207        let mut trail = true;
208        for i in 0..self.v.len() - 1 {
209            let sym_i = &self.v[i];
210            if first[sym_i].contains(&Symbol::Empty) {
211                new.extend(first[&self.v[i + 1]].iter().filter(|s| *s != &Symbol::Empty));
212            } else {
213                trail = false;
214                break;
215            }
216        }
217        if trail && first[self.last().unwrap()].contains(&Symbol::Empty) {
218            new.insert(Symbol::Empty);
219        }
220        new
221    }
222
223    pub fn is_greedy(&self) -> bool {
224        self.flags & ruleflag::GREEDY != 0
225    }
226}
227
228// we use only `v` (the string of symbols) and `flags` the equality test
229impl PartialEq for Alternative {
230    fn eq(&self, other: &Self) -> bool {
231        self.flags == other.flags && self.v == other.v
232    }
233}
234
235impl Deref for Alternative {
236    type Target = Vec<Symbol>;
237
238    fn deref(&self) -> &Self::Target {
239        &self.v
240    }
241}
242
243impl DerefMut for Alternative {
244    fn deref_mut(&mut self) -> &mut Self::Target {
245        &mut self.v
246    }
247}