cfg_grammar/
cfg.rs

1//! Definitions of the context-free grammar type and its rules.
2
3use std::borrow::Cow;
4use std::cell::RefCell;
5use std::fmt::Write;
6use std::rc::Rc;
7use std::{cmp, fmt};
8use std::{mem, ops};
9
10use cfg_history::earley::History;
11use cfg_symbol::SymbolName;
12
13use occurence_map::OccurenceMap;
14use rule_builder::RuleBuilder;
15
16use crate::local_prelude::*;
17use cfg_history::{
18    BinarizedRhsRange::*, HistoryNodeBinarize, HistoryNodeEliminateNulling, RootHistoryNode,
19};
20
21/// Context-free grammar type.
22///
23/// A context-free grammar can be though of as a regular expression
24/// equipped with recursion.
25#[derive(Clone, Debug)]
26pub struct Cfg {
27    /// The symbol source.
28    sym_source: SymbolSource,
29    /// The list of lexemes.
30    lexemes: SymbolBitSet,
31    /// The array of rules.
32    rules: Vec<CfgRule>,
33    /// Start symbols.
34    roots: MaybeSmallVec<Symbol>,
35    wrapped_roots: MaybeSmallVec<WrappedRoot>,
36    rhs_len_invariant: Option<usize>,
37    eliminate_nulling: bool,
38    tmp_stack: RefCell<Vec<Symbol>>,
39}
40
41/// Standard grammar rule representation.
42#[derive(Clone, Debug, Eq, PartialEq)]
43pub struct CfgRule {
44    /// The rule's left-hand side symbol.
45    pub lhs: Symbol,
46    /// The rule's right-hand side symbols.
47    pub rhs: Rc<[Symbol]>,
48    /// The rule's history.
49    pub history: History,
50}
51
52/// Standard grammar rule representation, including a name.
53/// Used for debugging in tests.
54#[derive(Clone)]
55pub struct NamedCfgRule {
56    /// The rule's left-hand side symbol.
57    pub lhs: Symbol,
58    /// The rule's right-hand side symbols.
59    pub rhs: Rc<[Symbol]>,
60    /// The rule's history.
61    ///
62    /// Carries information about grammar transformations
63    /// this rule went through.
64    pub history: Option<History>,
65    /// Collection of symbol names.
66    pub names: Vec<Option<SymbolName>>,
67}
68
69/// We have a method for adding "wrapped" roots in the form:
70/// `root ::= start_of_input ~ inner_root ~ end_of_input`.
71/// See [`fn wrap_input`].
72///
73/// [`fn wrap_input`]: Cfg::wrap_input
74#[derive(Clone, Copy, Debug)]
75pub struct WrappedRoot {
76    /// First symbol in the wrapping rule.
77    pub start_of_input: Symbol,
78    /// Second symbol in the wrapping rule.
79    pub inner_root: Symbol,
80    /// Third symbol in the wrapping rule.
81    pub end_of_input: Symbol,
82    /// The LHS of the wrapping rule.
83    pub root: Symbol,
84}
85
86/// Used only for [`fn rhs_closure`].
87///
88///
89///
90/// [`fn rhs_closure`]: Cfg::rhs_closure
91#[derive(Eq, PartialEq, Clone, Copy)]
92pub enum RhsPropertyMode {
93    /// If **all** symbols on the RHS have the property,
94    /// the LHS has it too.
95    All,
96    /// If **any** symbol on the RHS has the property,
97    /// the LHS has it too.
98    Any,
99}
100
101/// Exists only for [`fn column`].
102///
103/// [`fn column`]: Cfg::column
104#[derive(Clone, Copy, Debug)]
105pub struct DotInfo {
106    /// The LHS symbol for the grammar rule at the given row.
107    pub lhs: Symbol,
108    /// The pre-dot symbol, or `None` if our column 2 has a row with
109    /// only one symbol, meaning there is no pre-dot symbol.
110    pub predot: Option<Symbol>,
111    /// The post-dot symbol, or `None` if our column 1 has a row with
112    /// only one symbol, or we are at column 2, meaning there is no
113    /// post-dot symbol.
114    pub postdot: Option<Symbol>,
115    /// Semantics of the rule dot at the given column of rule dots.
116    pub earley: Option<earley::rule_dot::RuleDot>,
117}
118
119impl Default for Cfg {
120    fn default() -> Self {
121        Self::with_sym_source(SymbolSource::new())
122    }
123}
124
125impl Cfg {
126    /// Creates an empty context-free grammar.
127    #[inline]
128    pub fn new() -> Self {
129        Self::default()
130    }
131
132    /// Creates an empty context-free grammar with the given symbol source.
133    ///
134    /// Symbols will be generated with this symbol source.
135    pub fn with_sym_source(sym_source: SymbolSource) -> Self {
136        Cfg {
137            sym_source,
138            lexemes: SymbolBitSet::new(),
139            rules: vec![],
140            roots: MaybeSmallVec::new(),
141            wrapped_roots: MaybeSmallVec::new(),
142            rhs_len_invariant: None,
143            eliminate_nulling: false,
144            tmp_stack: RefCell::new(vec![]),
145        }
146    }
147
148    /// Returns generated symbols.
149    pub fn sym<const N: usize>(&mut self) -> [Symbol; N] {
150        self.sym_source_mut().sym()
151    }
152
153    /// Generates a new unique symbol.
154    ///
155    /// If a name is given, it will be recorded within the symbol
156    /// source.
157    pub fn next_sym(&mut self, name: Option<Cow<str>>) -> Symbol {
158        self.sym_source_mut().next_sym(name)
159    }
160
161    /// Generates a new unique symbol.
162    pub fn lexeme(&mut self, name: Option<Cow<str>>) -> Symbol {
163        self.lexemes.reserve(self.num_syms() + 1);
164        let result = self.sym_source_mut().next_sym(name);
165        self.lexemes.set(result, true);
166        result
167    }
168
169    // /// Generates
170    // pub fn sym_at<const N: usize>(at: usize) -> [Symbol; N] {
171    //     let mut sym_source = SymbolSource::new();
172    //     for _ in 0..at {
173    //         sym_source.next_sym(None);
174    //     }
175    //     sym_source.sym()
176    // }
177
178    /// Returns the number of symbols in use.
179    pub fn num_syms(&self) -> usize {
180        self.sym_source().num_syms()
181    }
182
183    /// Assings a new set of roots.
184    ///
185    /// A root may be also called a start symbol.
186    ///
187    /// Roots may be remapped with `Remap::remap_symbols`.
188    ///
189    /// This library needs to know the roots for operations
190    /// such as FOLLOW set calculation,  
191    pub fn set_roots(&mut self, roots: impl AsRef<[Symbol]>) {
192        self.roots = roots.as_ref().iter().copied().collect();
193    }
194
195    /// Returns the list of our previously assigned roots,
196    /// or empty if there were none assigned.
197    pub fn roots(&self) -> &[Symbol] {
198        &self.roots[..]
199    }
200
201    /// Returns the list of wrapped roots,
202    pub fn wrapped_roots(&self) -> &[WrappedRoot] {
203        &self.wrapped_roots[..]
204    }
205
206    /// Assigns a new set of wrapped roots.
207    ///
208    /// A wrapped roots may derive `start_of_input`, `root` and `end_of_input`.
209    pub fn set_wrapped_roots(&mut self, wrapped_roots: &[WrappedRoot]) {
210        self.wrapped_roots = wrapped_roots.into();
211    }
212
213    /// Determines whether there are assigned roots (whether there is a start
214    /// symbol).
215    pub fn has_roots(&self) -> bool {
216        !self.roots.is_empty()
217    }
218
219    /// Modifies this grammar to its weak equivalent.
220    ///
221    /// # Design
222    ///
223    /// - Q: Can I run this function twice?
224    /// - A: Yes, it's idempotent, with the caveat that the history should
225    ///   be handled correctly.
226    ///
227    /// # Invariants
228    ///
229    /// All rule RHS' have at most `limit` symbols at all times until another
230    /// call to this method followed by a rule addition, or a call to
231    /// [`fn binarize_and_eliminate_nulling_rules`].
232    ///
233    /// [`fn binarize_and_eliminate_nulling_rules`]: Self::binarize_and_eliminate_nulling_rules
234    pub fn limit_rhs_len(&mut self, limit: Option<usize>) {
235        self.rhs_len_invariant = limit;
236        for rule in mem::take(&mut self.rules) {
237            self.add_rule(rule);
238        }
239    }
240
241    /// The grammar rewrites and stores rules with a certain range of RHS lengths.
242    /// This method returns this range.
243    pub fn rule_rhs_len_allowed_range(&self) -> ops::Range<usize> {
244        self.eliminate_nulling as usize..self.rhs_len_invariant.unwrap_or(usize::MAX)
245    }
246
247    /// Sorts the rule array.
248    pub fn sort(&mut self) {
249        self.rules.sort_by_key(|rule| (rule.lhs, rule.rhs.clone()));
250    }
251
252    /// Sorts the rule array in place, using the argument to compare elements.
253    pub fn sort_by(&mut self, compare: impl FnMut(&CfgRule, &CfgRule) -> cmp::Ordering) {
254        self.rules.sort_by(compare);
255    }
256
257    /// Removes consecutive duplicate rules.
258    pub fn dedup(&mut self) {
259        self.rules.dedup_by_key(|rule| (rule.lhs, rule.rhs.clone()));
260    }
261
262    /// Extend the list of rules with the rules in the given grammar.
263    /// The given grammar must have a compatible set of symbols.
264    pub fn extend(&mut self, other: &Cfg) {
265        self.rules.extend(other.rules.iter().cloned());
266    }
267
268    /// Ensures the grammar is binarized and eliminates all nulling rules, which have the
269    /// form `A ::= ()`. Returns a nulling subgrammar containing all the eliminated parts
270    /// of the grammar.
271    ///
272    /// In other words, this method splits off the nulling parts of the grammar.
273    ///
274    /// The language represented by the grammar is preserved, except for the lack of
275    /// the empty string if there was one. Unproductive rules may be removed.
276    ///
277    /// # Design
278    ///
279    /// - Q: Why two operations in one function?
280    /// - A: We implemented nulling rule elimination for binarized grammars
281    ///   only, because it's much easier to do so. There are algorthms for
282    ///   such elimination for rules with arbitrary RHS length, and potentially
283    ///   they do not produce that many rules as a result, but we found no such
284    ///   need for our purposes. If you do, feel free to contribute.
285    /// - Q: Can I run binarization twice?
286    /// - A: Yes, it's idempotent, with the caveat that the history should
287    ///   be handled correctly. This means you can limit the RHS length at
288    ///   any point, and run this method later.
289    ///
290    /// # Invariants
291    ///
292    /// All rule RHS' have at least 1 symbol and at most 2 symbols at all times until
293    /// another call to this method or a call to [`fn limit_rhs_len`].
294    ///
295    /// [`fn limit_rhs_len`]: Self::limit_rhs_len
296    pub fn binarize_and_eliminate_nulling_rules(&mut self) -> Cfg {
297        self.limit_rhs_len(Some(2));
298
299        let mut result = Cfg::with_sym_source(self.sym_source.clone());
300
301        // Grab the set of nullable symbols. We will eliminate them
302        // on the RHS of every rule.
303        let mut nullable = self.nulling_symbols();
304        // If all symbols on the RHS are nullable, the LHS is also nullable,
305        // hence we use `rhs_closure_for_all`.
306        self.rhs_closure_for_all(&mut nullable);
307        if nullable.iter().count() == 0 {
308            // Nothing to do.
309            return result;
310        }
311
312        let mut rewritten_work = Cfg::new();
313        for rule in self.rules() {
314            // Here, all rules are binarized.
315            assert!(rule.rhs.len() <= 2);
316            let is_nullable = |sym: &Symbol| nullable[*sym];
317            // Get the range where the symbol(s) are non-nullable, or `None`
318            // if all symbols are non-nullable.
319            let maybe_which = match (
320                rule.rhs.get(0).map(is_nullable),
321                rule.rhs.get(1).map(is_nullable),
322            ) {
323                (Some(true), Some(true)) => Some(All(2)),
324                (Some(true), None) => Some(All(1)),
325                (None, None) => Some(All(0)),
326                (Some(true), Some(false)) => Some(Left),
327                (Some(false), Some(true)) => Some(Right),
328                (Some(false), None) | (Some(false), Some(false)) => None,
329                (None, Some(_)) => unreachable!(),
330            };
331            let which = if let Some(which) = maybe_which {
332                which
333            } else {
334                continue;
335            };
336            // Q: Why do `Into::into` and not `.into()`?
337            // A: It's less confusing than a trailing call after a struct.
338            let history: History = Into::into(HistoryNodeEliminateNulling {
339                prev: rule.history,
340                rhs0: rule.rhs.get(0).cloned(),
341                rhs1: rule.rhs.get(1).cloned(),
342                which,
343            });
344            match which {
345                All(num) => {
346                    // nulling
347                    if num == 2 {
348                        rewritten_work
349                            .rule(rule.lhs)
350                            .history(history)
351                            .rhs(&rule.rhs[0..1]);
352                        rewritten_work
353                            .rule(rule.lhs)
354                            .history(history)
355                            .rhs(&rule.rhs[1..2]);
356                    }
357                    result
358                        .rule(rule.lhs)
359                        .history(history)
360                        .rhs(&rule.rhs[which.as_range()]);
361                }
362                Left | Right => {
363                    // Q: Why `negate`?
364                    // A: We are not keeping the nullable `which`,
365                    //    we are keeping the other symbol.
366                    rewritten_work
367                        .rule(rule.lhs)
368                        .history(history)
369                        .rhs(&rule.rhs[which.negate().as_range()]);
370                }
371            }
372        }
373
374        self.extend(&rewritten_work);
375
376        self.rules.retain(|rule| !rule.rhs.is_empty());
377
378        let mut productive = SymbolBitSet::new();
379        // TODO check again if correct
380        // Begin with marking terminal symbols appearing on the RHS as making
381        // the LHS productive.
382        productive.terminal(&*self);
383        // Q: Why subtract symbols which are productive in the nulling grammar?
384        //    What does this even mean?
385        // A: This subtraction means every rule containing one or more nulling symbols
386        //    will be removed with the last operation below. (Keep in mind, nulling
387        //    does not mean nullable.)
388        productive.subtract_productive(&result);
389        // All symbols on the RHS must be productive for the LHS to be productive.
390        self.rhs_closure_for_all(&mut productive);
391        self.rules.retain(|rule| {
392            // Retain the rule only if it's productive. We have to, in order to remove rules
393            // that were made unproductive as a result of `A ::= epsilon` rule elimination.
394            // Otherwise, some of our nonterminal symbols might become terminal.
395            productive[rule.lhs]
396        });
397
398        result
399    }
400
401    /// Returns an iterator over the list of grammar rules.
402    pub fn rules<'a>(&'a self) -> impl Iterator<Item = &'a CfgRule>
403    where
404        Self: 'a,
405    {
406        self.rules.iter()
407    }
408
409    /// Returns an iterator over the info on the given dot position.
410    ///
411    /// # Example
412    ///
413    /// When the grammar contains rules:
414    ///
415    /// - `start ::= A B C`
416    /// - `A ::= D E`
417    ///
418    /// We can get the following info on the `col` 0:
419    ///
420    /// - `DotInfo { lhs: start, predot: None, postdot: Some(A), earley: ... }`
421    /// - `DotInfo { lhs: A, predot: None, postdot: Some(D), earley: ... }`
422    ///
423    /// Because we are getting info for dots:
424    ///
425    /// - `start ::= • A B C`
426    /// - `A ::= • D E`
427    ///
428    /// We can also get the following info on the col `2`:
429    ///
430    /// - `DotInfo { lhs: start, predot: Some(B), postdot: Some(C), earley: ... }`
431    /// - `DotInfo { lhs: A, predot: Some(E), postdot: None, earley: ... }`
432    ///
433    /// Because we are getting info for dots:
434    ///
435    /// - `start ::= A B • C`
436    /// - `A ::= D E •`
437    pub fn column(&self, col: usize) -> impl Iterator<Item = DotInfo> + '_ {
438        let mapper = move |rule: &CfgRule| DotInfo {
439            lhs: rule.lhs,
440            predot: rule.rhs.get(col.wrapping_sub(1)).copied(),
441            postdot: rule.rhs.get(col).copied(),
442            earley: Some(rule.history.dot(col)),
443        };
444        self.rules().map(mapper)
445    }
446
447    /// Allows access to the symbol source through a reference.
448    pub fn sym_source(&self) -> &SymbolSource {
449        &self.sym_source
450    }
451
452    /// Allows mutable access to the symbol source through a reference.
453    pub fn sym_source_mut(&mut self) -> &mut SymbolSource {
454        &mut self.sym_source
455    }
456
457    /// Retains only the rules specified by the predicate.
458    ///
459    /// In other words, removes all the rules for which `f(&rule)`
460    /// returns false.
461    pub fn retain(&mut self, f: impl FnMut(&CfgRule) -> bool) {
462        self.rules.retain(f);
463    }
464
465    /// Adds a rule to this grammar, binarizing or limiting its length
466    /// if [`fn limit_rhs_len`] was called. Returns the number of parts
467    /// the given rule was split into.
468    ///
469    /// [`fn limit_rhs_len`]: Self::limit_rhs_len
470    pub fn add_rule(&mut self, rule: CfgRule) -> u32 {
471        if self.rule_rhs_len_allowed_range().contains(&rule.rhs.len()) {
472            self.rules.push(rule);
473            return 1;
474        }
475
476        // Rewrite to a set of binarized rules.
477        // From `LHS ⸬= A B C … X Y Z` to:
478        // ____________________
479        // | LHS ⸬= S0  Z
480        // | S0  ⸬= S1  Y
481        // | S1  ⸬= S2  X
482        // | …
483        // | Sm  ⸬= Sn  C
484        // | Sn  ⸬= A   B
485        let mut rhs_rev = rule.rhs.to_vec();
486        rhs_rev.reverse();
487        let mut tail = Vec::new();
488        let mut parts: u32 = 0;
489        while !rhs_rev.is_empty() {
490            let tail_idx = rhs_rev
491                .len()
492                .saturating_sub(self.rule_rhs_len_allowed_range().end);
493            tail.extend(rhs_rev.drain(tail_idx..));
494            tail.reverse();
495            let lhs;
496            if rhs_rev.is_empty() {
497                lhs = rule.lhs;
498            } else {
499                lhs = self.next_sym(None);
500                rhs_rev.push(lhs);
501            }
502            let history;
503            if parts == 0 && rhs_rev.is_empty() || self.rule_rhs_len_allowed_range().end != 2 {
504                history = rule.history;
505            } else {
506                history = Into::into(HistoryNodeBinarize {
507                    prev: rule.history,
508                    height: parts,
509                    full_len: rule.rhs.len(),
510                    is_top: rhs_rev.is_empty(),
511                });
512            }
513            self.rules.push(CfgRule::new(lhs, &tail[..], history));
514            tail.clear();
515            parts += 1;
516        }
517
518        parts
519    }
520
521    /// Empties the grammar.
522    pub fn clear_rules(&mut self) {
523        self.rules.clear();
524    }
525
526    /// Reverses the grammar.
527    pub fn reverse(&mut self) {
528        for rule in &mut self.rules {
529            rule.rhs = rule.rhs.iter().copied().rev().collect();
530        }
531    }
532
533    /// Starts building a new rule.
534    pub fn rule(&mut self, lhs: Symbol) -> RuleBuilder<'_> {
535        RuleBuilder::new(self).rule(lhs)
536    }
537
538    /// Starts building a new precedenced rule.
539    pub fn precedenced_rule(&mut self, lhs: Symbol) -> PrecedencedRuleBuilder<'_> {
540        PrecedencedRuleBuilder::new(self, lhs)
541    }
542
543    /// If **any** symbols on the RHS have the property, the LHS has it too.
544    /// Updates the given symbol set according to the above, and does it
545    /// transitively.
546    pub fn rhs_closure_for_all(&self, property: &mut SymbolBitSet) {
547        self.rhs_closure(property, RhsPropertyMode::All)
548    }
549
550    /// If **all** symbols on the RHS have the property, the LHS has it too.
551    /// Updates the given symbol set according to the above, and does it
552    /// transitively.
553    pub fn rhs_closure_for_any(&self, property: &mut SymbolBitSet) {
554        self.rhs_closure(property, RhsPropertyMode::Any)
555    }
556
557    /// If **any** or **all** symbols on the RHS have the property, the LHS
558    /// has it too.
559    /// Updates the given symbol set according to the above, and does it
560    /// transitively.
561    pub fn rhs_closure(&self, property: &mut SymbolBitSet, property_mode: RhsPropertyMode) {
562        let mut tmp_stack = self.tmp_stack.borrow_mut();
563        tmp_stack.extend(property.iter());
564
565        let occurence_map = OccurenceMap::from_rules(self.rules());
566
567        while let Some(work_sym) = tmp_stack.pop() {
568            for &rule_id in occurence_map.get(work_sym).rhs() {
569                let rule = &self.rules[rule_id];
570                let mut rhs_iter = rule.rhs.iter();
571                let get_property = |&sym: &Symbol| property[sym];
572                let rhs_satifies_property = match property_mode {
573                    RhsPropertyMode::All => rhs_iter.all(get_property),
574                    RhsPropertyMode::Any => rhs_iter.any(get_property),
575                };
576                if !property[rule.lhs] && rhs_satifies_property {
577                    property.set(rule.lhs, true);
578                    tmp_stack.push(rule.lhs);
579                }
580            }
581        }
582
583        tmp_stack.clear();
584    }
585
586    /// Primarily for minimal distance computation.
587    ///
588    /// The `value` argument contains a list of weights, one per symbol. The elements `None`
589    /// will be filled with new weights.
590    pub fn rhs_closure_with_values(&mut self, value: &mut [Option<u32>]) {
591        let mut tmp_stack = self.tmp_stack.borrow_mut();
592        for (maybe_sym_value, sym) in value.iter().zip(SymbolSource::generate_fresh()) {
593            if maybe_sym_value.is_some() {
594                tmp_stack.push(sym);
595            }
596        }
597
598        let occurence_map = OccurenceMap::from_rules(self.rules());
599
600        while let Some(work_sym) = tmp_stack.pop() {
601            let rules = occurence_map
602                .get(work_sym)
603                .rhs()
604                .iter()
605                .map(|&rule_id| &self.rules[rule_id]);
606            for rule in rules {
607                let maybe_work_value = rule
608                    .rhs
609                    .iter()
610                    .try_fold(0, |acc, elem| value[elem.usize()].map(|val| acc + val));
611                if let Some(work_value) = maybe_work_value {
612                    if let Some(current_value) = value[rule.lhs.usize()] {
613                        if current_value <= work_value {
614                            continue;
615                        }
616                    }
617                    value[rule.lhs.usize()] = Some(work_value);
618                    tmp_stack.push(rule.lhs);
619                }
620            }
621        }
622
623        tmp_stack.clear();
624    }
625
626    /// Modifies this grammar to wrap all roots by adding rules of the form:
627    /// `wrapped_root ::= start_of_input ~ root ~ end_of_input`.
628    ///
629    /// The result can be accessed with [`fn wrapped_roots`] and replaced using
630    /// [`fn set_wrapped_roots`].
631    ///
632    /// [`fn wrapped_roots`]: Self::wrapped_roots
633    /// [`fn set_wrapped_roots`]: Self::set_wrapped_roots
634    pub fn wrap_input(&mut self) {
635        self.wrapped_roots.clear();
636        let roots_len = self.roots.len();
637        let roots = mem::replace(&mut self.roots, MaybeSmallVec::with_capacity(roots_len));
638        for inner_root in roots {
639            let [root, start_of_input, end_of_input] = self.sym_source.with_names([
640                Some("root"),
641                Some("start_of_input"),
642                Some("end_of_input"),
643            ]);
644            self.add_rule(CfgRule {
645                lhs: root,
646                rhs: [start_of_input, inner_root, end_of_input].into(),
647                history: RootHistoryNode::Rule { lhs: root }.into(),
648            });
649            self.wrapped_roots.push(WrappedRoot {
650                root,
651                start_of_input,
652                inner_root,
653                end_of_input,
654            });
655            self.roots.push(root);
656        }
657    }
658
659    /// Checks whether the grammar is empty.
660    pub fn is_empty(&self) -> bool {
661        if self.wrapped_roots.is_empty() {
662            self.rules.is_empty()
663        } else {
664            let mut roots = self.roots.clone();
665            roots.sort();
666            self.rules()
667                .all(|rule| roots.binary_search(&rule.lhs).is_ok())
668        }
669    }
670
671    /// Formats the grammar to a `String`. The output looks like this:
672    ///
673    /// ```ignore
674    /// start(1) ::= A(2) B(3) C(4);
675    /// A(2) ::= g0(5) B(3);
676    /// ```
677    ///
678    /// Or, in case of no names to display in the entire grammar (all symbols
679    /// are gensyms), only the numbers are displayed.
680    pub fn stringify_to_bnf(&self) -> String {
681        let mut result = String::new();
682        let no_names = self.sym_source.names().iter().all(|name| name.is_none());
683        for rule in self.rules() {
684            let stringify_sym = |sym: Symbol| {
685                if no_names {
686                    format!("{}", sym.usize())
687                } else {
688                    format!("{}({})", self.sym_source.name_of(sym), sym.usize())
689                }
690            };
691            let lhs = stringify_sym(rule.lhs);
692            let rhs = if rule.rhs.is_empty() {
693                "()".into()
694            } else {
695                rule.rhs
696                    .iter()
697                    .copied()
698                    .map(stringify_sym)
699                    .enumerate()
700                    .map(|(i, elem)| {
701                        if i == 0 {
702                            elem.to_string()
703                        } else {
704                            format!(" ~ {}", elem)
705                        }
706                    })
707                    .collect::<String>()
708            };
709            writeln!(&mut result, "{} ::= {};", lhs, rhs).expect("writing to String failed");
710        }
711        result
712    }
713}
714
715impl CfgRule {
716    /// Creates a new rule.
717    pub fn new(lhs: Symbol, rhs: impl AsRef<[Symbol]>, history: History) -> Self {
718        CfgRule {
719            lhs,
720            rhs: rhs.as_ref().into(),
721            history,
722        }
723    }
724
725    /// Creates a named grammar rule with the symbols having names
726    /// grabbed from the given symbol source.
727    ///
728    /// # Panics
729    ///
730    /// Panics if the given list is empty.
731    pub fn named(&self, sym_source: &SymbolSource) -> NamedCfgRule {
732        NamedCfgRule {
733            lhs: self.lhs,
734            rhs: self.rhs.clone(),
735            history: Some(self.history),
736            names: sym_source.names(),
737        }
738    }
739}
740
741impl NamedCfgRule {
742    /// Creates a named grammar rule with the symbols having the given names.
743    /// LHS will have the first name in the list. The RHS is created with
744    /// the remaining names, one RHS symbol per name.
745    ///
746    /// # Panics
747    ///
748    /// Panics if the given list is empty.
749    pub fn new(names: Vec<Option<SymbolName>>) -> Self {
750        let mut iter = SymbolSource::generate_fresh();
751        NamedCfgRule {
752            lhs: iter.next().unwrap(),
753            rhs: iter.take(names.len() - 1).collect::<Vec<_>>().into(),
754            history: None,
755            names,
756        }
757    }
758
759    /// Creates a named grammar rule with the symbols having the given names,
760    /// and the given history.
761    ///
762    /// # Panics
763    ///
764    /// Panics if the given list is empty.
765    pub fn with_history(names: Vec<Option<SymbolName>>, history: History) -> Self {
766        let mut iter = SymbolSource::generate_fresh();
767        NamedCfgRule {
768            lhs: iter.next().unwrap(),
769            rhs: iter.take(names.len() - 1).collect::<Vec<_>>().into(),
770            history: Some(history),
771            names,
772        }
773    }
774}
775
776/// Creates a new Cfg rule, which holds names for debugging purposes.
777#[macro_export]
778macro_rules! named_cfg_rule {
779    ($lhs:ident ::= $($rhs:ident)*) => {
780        {
781            use std::rc::Rc;
782            NamedCfgRule::new(vec![Some(stringify!($lhs).into()), $(Some(stringify!($rhs).into())),*])
783        }
784    };
785}
786
787impl Eq for NamedCfgRule {
788    fn assert_receiver_is_total_eq(&self) {}
789}
790
791impl PartialEq for NamedCfgRule {
792    fn eq(&self, other: &Self) -> bool {
793        self.names[self.lhs.usize()] == other.names[other.lhs.usize()]
794            && self.rhs.len() == other.rhs.len()
795            && self
796                .rhs
797                .iter()
798                .zip(other.rhs.iter())
799                .all(|(sym_a, sym_b)| self.names[sym_a.usize()] == other.names[sym_b.usize()])
800            && (self.history.is_none() || other.history.is_none() || self.history == other.history)
801    }
802}
803
804impl fmt::Debug for NamedCfgRule {
805    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
806        let gensym = &"gensym".to_string();
807        let lhs = self.names[self.lhs.usize()].as_deref().unwrap_or(gensym);
808        let rhs = self
809            .rhs
810            .iter()
811            .map(|sym| self.names[sym.usize()].as_deref().unwrap_or(gensym))
812            .collect::<Vec<_>>();
813        f.debug_struct("NamedCfgRule")
814            .field("lhs", &lhs)
815            .field("rhs", &rhs)
816            .field("history", &self.history)
817            .finish()
818    }
819}
cfg_grammar/cfg.rs

cfg_grammar/
cfg.rs