rustemo_compiler/grammar/
mod.rs

1use std::{
2    cell::Cell,
3    collections::BTreeMap,
4    fmt::Display,
5    hash::{Hash, Hasher},
6    str::FromStr,
7};
8
9use rustemo::{Error, Parser, Result};
10
11use crate::{
12    index::{
13        NonTermIndex, NonTermVec, ProdIndex, ProdVec, SymbolIndex, SymbolVec, TermIndex, TermVec,
14    },
15    lang::{rustemo::RustemoParser, rustemo_actions::Name},
16};
17
18use self::builder::GrammarBuilder;
19
20use super::lang::rustemo_actions::{
21    GrammarSymbol, Imports, ProdMetaDatas, Recognizer, TermMetaDatas,
22};
23
24pub(crate) mod builder;
25#[cfg(test)]
26mod tests;
27pub(crate) mod types;
28
29#[derive(Debug)]
30pub struct Grammar {
31    pub imports: Imports,
32    pub productions: ProdVec<Production>,
33
34    pub terminals: TermVec<Terminal>,
35    pub nonterminals: NonTermVec<NonTerminal>,
36    pub nonterm_by_name: BTreeMap<String, SymbolIndex>,
37    pub term_by_name: BTreeMap<String, SymbolIndex>,
38    /// Index of EMPTY symbol
39    pub empty_index: SymbolIndex,
40    /// Index of STOP symbol
41    pub stop_index: SymbolIndex,
42    /// Index of grammar augmented symbol
43    pub augmented_index: SymbolIndex,
44    /// Index of augmented symbol for Layout rule if given
45    pub augmented_layout_index: Option<SymbolIndex>,
46    /// An index of the start symbol. First non-terminal or terminal of the grammar.
47    pub start_index: SymbolIndex,
48}
49
50macro_rules! grammar_elem {
51    ($name:ident) => {
52        impl PartialEq for $name {
53            fn eq(&self, other: &Self) -> bool {
54                self.idx == other.idx
55            }
56        }
57        impl Eq for $name {}
58        impl Hash for $name {
59            fn hash<H: Hasher>(&self, state: &mut H) {
60                self.idx.hash(state);
61            }
62        }
63
64        impl PartialOrd for $name {
65            fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
66                Some(self.cmp(other))
67            }
68        }
69
70        impl Ord for $name {
71            fn cmp(&self, other: &Self) -> std::cmp::Ordering {
72                self.idx.cmp(&other.idx)
73            }
74        }
75    };
76}
77
78#[derive(Debug, Default)]
79pub struct Terminal {
80    pub idx: TermIndex,
81    pub name: String,
82    pub annotation: Option<String>,
83    pub recognizer: Option<Recognizer>,
84
85    /// Terminal will carry content if it is a non-constant match (e.g. a regex
86    /// or a custom recognizer).
87    pub has_content: bool,
88
89    /// Is this terminal reachable from the start rule.
90    /// Used to determine layout-only rules.
91    pub reachable: Cell<bool>,
92
93    /// Priority used to decide conflict resolutions
94    pub prio: Priority,
95
96    /// Associativity used to decide shift/reduce conflict resolutions
97    pub assoc: Associativity,
98
99    pub meta: TermMetaDatas,
100}
101grammar_elem!(Terminal);
102
103#[derive(Debug, Default)]
104pub struct NonTerminal {
105    pub idx: NonTermIndex,
106    pub name: String,
107    pub annotation: Option<String>,
108    pub productions: Vec<ProdIndex>,
109
110    /// Is this non-terminal reachable from the start rule.
111    /// Used to determine layout-only rules.
112    pub reachable: Cell<bool>,
113}
114grammar_elem!(NonTerminal);
115
116impl NonTerminal {
117    #[inline]
118    pub fn productions<'a>(&self, grammar: &'a Grammar) -> Vec<&'a Production> {
119        self.productions
120            .iter()
121            .map(|&idx| &grammar.productions[idx])
122            .collect()
123    }
124}
125
126impl Display for Grammar {
127    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
128        writeln!(f, "\nGRAMMAR [")?;
129        writeln!(f, "\nTerminals:")?;
130        for terminal in &self.terminals {
131            writeln!(f, "{}. {}", terminal.idx, terminal.name)?;
132        }
133        writeln!(f, "\nNonTerminals:")?;
134        for nonterminal in &self.nonterminals {
135            writeln!(
136                f,
137                "{} ({}). {}",
138                nonterminal.idx,
139                self.nonterm_to_symbol_index(nonterminal.idx),
140                nonterminal.name
141            )?;
142        }
143        writeln!(f, "\nProductions:")?;
144        for production in &self.productions {
145            write!(
146                f,
147                "{}. {}: ",
148                production.idx, self.nonterminals[production.nonterminal].name
149            )?;
150            for assignment in &production.rhs {
151                write!(f, "{} ", self.symbol_name(res_symbol(assignment)))?;
152            }
153            writeln!(f)?;
154        }
155
156        writeln!(f, "\n] GRAMMAR")
157    }
158}
159
160#[derive(Debug, Default, PartialEq, Eq)]
161pub enum Associativity {
162    #[default]
163    None,
164    Left,
165    Right,
166}
167
168pub type Priority = u32;
169pub const DEFAULT_PRIORITY: u32 = 10;
170
171#[derive(Debug)]
172pub struct Production {
173    pub idx: ProdIndex,
174    pub nonterminal: NonTermIndex,
175    pub ntidx: usize,
176    pub kind: Option<String>,
177    pub rhs: Vec<ResolvingAssignment>,
178    pub assoc: Associativity,
179    pub prio: Priority,
180    pub dynamic: bool,
181    pub nops: bool,
182    pub nopse: bool,
183    pub meta: ProdMetaDatas,
184}
185grammar_elem!(Production);
186
187impl Default for Production {
188    fn default() -> Self {
189        Self {
190            // These two should always be given.
191            idx: ProdIndex(usize::MAX),
192            nonterminal: NonTermIndex(usize::MAX),
193
194            ntidx: 0,
195            kind: None,
196            rhs: Default::default(),
197            assoc: Default::default(),
198            prio: DEFAULT_PRIORITY,
199            dynamic: Default::default(),
200            nops: Default::default(),
201            nopse: Default::default(),
202            meta: Default::default(),
203        }
204    }
205}
206
207impl Production {
208    #[inline]
209    pub fn rhs_symbols(&self) -> Vec<SymbolIndex> {
210        self.rhs.iter().map(res_symbol).collect()
211    }
212
213    /// Returns resolved RHS assignments
214    #[inline]
215    pub fn rhs_assign(&self) -> Vec<Assignment> {
216        self.rhs
217            .iter()
218            .enumerate()
219            .map(|(idx, a)| Assignment {
220                name: a.name.clone(),
221                symbol: res_symbol(a),
222                is_bool: a.is_bool,
223                idx,
224            })
225            .collect()
226    }
227
228    /// Returns RHS assignment which has some content (i.e. non-terminals and
229    /// non-constant terminals).
230    pub fn rhs_with_content(&self, grammar: &Grammar) -> Vec<Assignment> {
231        self.rhs_assign()
232            .into_iter()
233            .filter(|a| grammar.symbol_has_content(a.symbol))
234            .collect::<Vec<_>>()
235    }
236
237    #[inline]
238    pub fn rhs_symbol(&self, pos: usize) -> SymbolIndex {
239        res_symbol(&self.rhs[pos])
240    }
241
242    #[inline]
243    pub fn nonterminal<'a>(&self, grammar: &'a Grammar) -> &'a NonTerminal {
244        &grammar.nonterminals[self.nonterminal]
245    }
246
247    pub fn to_string(&self, grammar: &Grammar) -> String {
248        format!(
249            "{}: {}",
250            self.nonterminal(grammar).name,
251            grammar.symbol_names(self.rhs_symbols()).join(" ")
252        )
253    }
254}
255
256impl Display for Production {
257    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258        write!(f, "{}:", self.idx)?;
259        for assign in &self.rhs {
260            write!(f, " ")?;
261            if assign.name.is_some() {
262                write!(f, "{}=", assign.name.as_ref().unwrap())?;
263            }
264            write!(
265                f,
266                "{}",
267                match &assign.symbol.symbol {
268                    GrammarSymbol::Name(name) => name.as_ref().into(),
269                    GrammarSymbol::StrConst(mtch) => {
270                        format!("\"{}\"", mtch.as_ref())
271                    }
272                }
273            )?;
274        }
275        Ok(())
276    }
277}
278
279#[derive(Debug)]
280pub struct ResolvingSymbolIndex {
281    index: Option<SymbolIndex>,
282    symbol: GrammarSymbol,
283}
284
285#[derive(Debug)]
286pub struct ResolvingAssignment {
287    pub name: Option<Name>,
288    pub symbol: ResolvingSymbolIndex,
289    pub is_bool: bool,
290}
291
292#[derive(Debug)]
293pub struct Assignment {
294    pub name: Option<Name>,
295    pub symbol: SymbolIndex,
296    /// If this assignment is ?= variant. RHS is true if Some.
297    pub is_bool: bool,
298    /// position in RHS, zero based.
299    pub idx: usize,
300}
301
302/// Called for Assignment to extract resolved SymbolIndex.
303#[inline]
304pub(crate) fn res_symbol(assign: &ResolvingAssignment) -> SymbolIndex {
305    assign
306        .symbol
307        .index
308        .unwrap_or_else(|| panic!("Unresolved symbol {:?}", &assign.symbol.symbol))
309}
310
311// This can be used at the moment due to conflict with a blankt impl in the core.
312// See: https://github.com/rust-lang/rust/issues/50133
313// impl<T: AsRef<str>> TryFrom<T> for Grammar {
314//     type Error = Error;
315
316//     fn try_from(value: T) -> std::result::Result<Self, Self::Error> {
317//        Grammar::from_string(value.as_ref())
318//     }
319// }
320
321impl FromStr for Grammar {
322    type Err = Error;
323
324    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
325        Grammar::from_string(s)
326    }
327}
328
329impl Grammar {
330    /// Parses given string and constructs a Grammar instance
331    fn from_string<G: AsRef<str>>(grammar_str: G) -> Result<Self> {
332        GrammarBuilder::new()
333            .try_from_file(RustemoParser::new().parse(grammar_str.as_ref())?, None)
334    }
335
336    // /// Parses given file and constructs a Grammar instance
337    // /// FIXME: Return/move owned string from file content.
338    // pub fn from_file<F: AsRef<Path>>(file: F) -> Result<Self> {
339    //     use crate::rustemo_types::{NonTerminal, Symbol};
340    //     if let Symbol::NonTerminal(NonTerminal::PGFile(pgfile)) =
341    //         RustemoParser::parse_file(file)?
342    //     {
343    //         Ok(Self::from_pgfile(pgfile))
344    //     } else {
345    //         panic!("Invalid symbol from grammar parse!")
346    //     }
347    // }
348
349    pub(crate) fn new_termvec<T: Clone>(&self, default: T) -> TermVec<T> {
350        TermVec(vec![default; self.terminals.len()])
351    }
352
353    pub(crate) fn new_nontermvec<T: Clone>(&self, default: T) -> NonTermVec<T> {
354        NonTermVec(vec![default; self.nonterminals.len()])
355    }
356
357    pub fn symbol_index(&self, name: &str) -> SymbolIndex {
358        *self.term_by_name.get(name).unwrap_or_else(|| {
359            self.nonterm_by_name.get(name).unwrap_or_else(|| {
360                panic!("No Symbol by name {name:?}");
361            })
362        })
363    }
364
365    pub fn symbol_name(&self, index: SymbolIndex) -> String {
366        if index.0 < self.terminals.len() {
367            self.symbol_to_term(index).name.clone()
368        } else {
369            self.symbol_to_nonterm(index).name.clone()
370        }
371    }
372
373    /// If this symbol is either a non-terminal of a terminal with a content.
374    /// I.e. not a constant match terminal (keyword, punctuation...)
375    #[inline]
376    pub fn symbol_has_content(&self, symbol: SymbolIndex) -> bool {
377        !self.is_empty(symbol)
378            && (self.is_nonterm(symbol) || self.symbol_to_term(symbol).has_content)
379    }
380
381    pub fn symbol_indexes(&self, names: &[&str]) -> SymbolVec<SymbolIndex> {
382        let mut indexes = SymbolVec::new();
383        for name in names {
384            indexes.push(self.symbol_index(name))
385        }
386        indexes
387    }
388
389    #[inline]
390    pub fn symbol_names<T>(&self, indexes: T) -> Vec<String>
391    where
392        T: IntoIterator<Item = SymbolIndex>,
393    {
394        indexes.into_iter().map(|i| self.symbol_name(i)).collect()
395    }
396
397    #[inline]
398    pub fn term_to_symbol_index(&self, index: TermIndex) -> SymbolIndex {
399        SymbolIndex(index.0)
400    }
401
402    /// Convert symbol index to terminal index.
403    #[inline]
404    pub fn symbol_to_term_index(&self, index: SymbolIndex) -> TermIndex {
405        TermIndex(index.0)
406    }
407
408    /// Convert symbol index to terminal
409    #[inline]
410    pub fn symbol_to_term(&self, index: SymbolIndex) -> &Terminal {
411        &self.terminals[self.symbol_to_term_index(index)]
412    }
413
414    /// Get Terminal by name.
415    #[inline]
416    pub fn term_by_name(&self, name: &str) -> &Terminal {
417        self.symbol_to_term(self.symbol_index(name))
418    }
419
420    /// Get Terminal by index.
421    #[inline]
422    pub fn term_by_index(&self, index: TermIndex) -> &Terminal {
423        self.symbol_to_term(self.term_to_symbol_index(index))
424    }
425
426    #[inline]
427    pub fn nonterm_to_symbol_index(&self, index: NonTermIndex) -> SymbolIndex {
428        SymbolIndex(index.0 + self.terminals.len())
429    }
430
431    /// Convert symbol index to non-terminal index. Panics if symbol index is a
432    /// terminal index.
433    #[inline]
434    pub fn symbol_to_nonterm_index(&self, index: SymbolIndex) -> NonTermIndex {
435        NonTermIndex(index.0.checked_sub(self.terminals.len()).unwrap())
436    }
437
438    /// Convert symbol index to non-terminal. Panics if symbol index is a
439    /// terminal index.
440    #[inline]
441    pub fn symbol_to_nonterm(&self, index: SymbolIndex) -> &NonTerminal {
442        &self.nonterminals[NonTermIndex(index.0.checked_sub(self.terminals.len()).unwrap())]
443    }
444
445    /// Get NonTerminal by name.
446    #[inline]
447    pub fn nonterm_by_name(&self, name: &str) -> &NonTerminal {
448        self.symbol_to_nonterm(self.symbol_index(name))
449    }
450
451    /// Get NonTerminal by index.
452    #[inline]
453    pub fn nonterm_by_index(&self, index: NonTermIndex) -> &NonTerminal {
454        self.symbol_to_nonterm(self.nonterm_to_symbol_index(index))
455    }
456
457    #[inline]
458    pub fn is_nonterm(&self, index: SymbolIndex) -> bool {
459        index.0 >= self.terminals.len()
460    }
461
462    #[inline]
463    pub fn is_term(&self, index: SymbolIndex) -> bool {
464        index.0 < self.terminals.len()
465    }
466
467    #[inline]
468    pub fn is_empty(&self, index: SymbolIndex) -> bool {
469        index == self.empty_index
470    }
471
472    #[inline]
473    pub fn production_len(&self, prod: ProdIndex) -> usize {
474        self.productions[prod].rhs.len()
475    }
476
477    #[inline]
478    pub fn production_rhs_symbols(&self, prod: ProdIndex) -> Vec<SymbolIndex> {
479        self.productions[prod].rhs.iter().map(res_symbol).collect()
480    }
481
482    /// Returns all productions except special AUG and AUGL.
483    pub fn productions(&self) -> Vec<&Production> {
484        self.productions
485            .iter()
486            .filter(|&p| {
487                let nt_symbol = self.nonterm_to_symbol_index(p.nonterminal);
488                nt_symbol != self.augmented_index && self.augmented_layout_index != Some(nt_symbol)
489            })
490            .collect()
491    }
492
493    /// Returns all nonterminals except special EMPTY, AUG and AUGL.
494    pub fn nonterminals(&self) -> Vec<&NonTerminal> {
495        self.nonterminals
496            .iter()
497            .filter(|&n| {
498                let nt_symbol = self.nonterm_to_symbol_index(n.idx);
499                nt_symbol != self.empty_index
500                    && nt_symbol != self.augmented_index
501                    && self.augmented_layout_index != Some(nt_symbol)
502            })
503            .collect()
504    }
505
506    #[inline]
507    pub fn is_enum(&self, nonterminal: &NonTerminal) -> bool {
508        let prods = nonterminal.productions(self);
509        prods.iter().filter(|x| x.rhs.len() == 1).count() == prods.len()
510    }
511
512    #[inline]
513    pub fn has_layout(&self) -> bool {
514        self.augmented_layout_index.is_some()
515    }
516}