cfg_grammar/cfg.rs
1//! Definitions of the context-free grammar type and its rules.
2
3use std::borrow::Cow;
4use std::cell::RefCell;
5use std::fmt::Write;
6use std::rc::Rc;
7use std::{cmp, fmt};
8use std::{mem, ops};
9
10use cfg_history::earley::History;
11use cfg_symbol::SymbolName;
12
13use occurence_map::OccurenceMap;
14use rule_builder::RuleBuilder;
15
16use crate::local_prelude::*;
17use cfg_history::{
18 BinarizedRhsRange::*, HistoryNodeBinarize, HistoryNodeEliminateNulling, RootHistoryNode,
19};
20
21/// Context-free grammar type.
22///
23/// A context-free grammar can be though of as a regular expression
24/// equipped with recursion.
25#[derive(Clone, Debug)]
26pub struct Cfg {
27 /// The symbol source.
28 sym_source: SymbolSource,
29 /// The list of lexemes.
30 lexemes: SymbolBitSet,
31 /// The array of rules.
32 rules: Vec<CfgRule>,
33 /// Start symbols.
34 roots: MaybeSmallVec<Symbol>,
35 wrapped_roots: MaybeSmallVec<WrappedRoot>,
36 rhs_len_invariant: Option<usize>,
37 eliminate_nulling: bool,
38 tmp_stack: RefCell<Vec<Symbol>>,
39}
40
41/// Standard grammar rule representation.
42#[derive(Clone, Debug, Eq, PartialEq)]
43pub struct CfgRule {
44 /// The rule's left-hand side symbol.
45 pub lhs: Symbol,
46 /// The rule's right-hand side symbols.
47 pub rhs: Rc<[Symbol]>,
48 /// The rule's history.
49 pub history: History,
50}
51
52/// Standard grammar rule representation, including a name.
53/// Used for debugging in tests.
54#[derive(Clone)]
55pub struct NamedCfgRule {
56 /// The rule's left-hand side symbol.
57 pub lhs: Symbol,
58 /// The rule's right-hand side symbols.
59 pub rhs: Rc<[Symbol]>,
60 /// The rule's history.
61 ///
62 /// Carries information about grammar transformations
63 /// this rule went through.
64 pub history: Option<History>,
65 /// Collection of symbol names.
66 pub names: Vec<Option<SymbolName>>,
67}
68
69/// We have a method for adding "wrapped" roots in the form:
70/// `root ::= start_of_input ~ inner_root ~ end_of_input`.
71/// See [`fn wrap_input`].
72///
73/// [`fn wrap_input`]: Cfg::wrap_input
74#[derive(Clone, Copy, Debug)]
75pub struct WrappedRoot {
76 /// First symbol in the wrapping rule.
77 pub start_of_input: Symbol,
78 /// Second symbol in the wrapping rule.
79 pub inner_root: Symbol,
80 /// Third symbol in the wrapping rule.
81 pub end_of_input: Symbol,
82 /// The LHS of the wrapping rule.
83 pub root: Symbol,
84}
85
86/// Used only for [`fn rhs_closure`].
87///
88///
89///
90/// [`fn rhs_closure`]: Cfg::rhs_closure
91#[derive(Eq, PartialEq, Clone, Copy)]
92pub enum RhsPropertyMode {
93 /// If **all** symbols on the RHS have the property,
94 /// the LHS has it too.
95 All,
96 /// If **any** symbol on the RHS has the property,
97 /// the LHS has it too.
98 Any,
99}
100
101/// Exists only for [`fn column`].
102///
103/// [`fn column`]: Cfg::column
104#[derive(Clone, Copy, Debug)]
105pub struct DotInfo {
106 /// The LHS symbol for the grammar rule at the given row.
107 pub lhs: Symbol,
108 /// The pre-dot symbol, or `None` if our column 2 has a row with
109 /// only one symbol, meaning there is no pre-dot symbol.
110 pub predot: Option<Symbol>,
111 /// The post-dot symbol, or `None` if our column 1 has a row with
112 /// only one symbol, or we are at column 2, meaning there is no
113 /// post-dot symbol.
114 pub postdot: Option<Symbol>,
115 /// Semantics of the rule dot at the given column of rule dots.
116 pub earley: Option<earley::rule_dot::RuleDot>,
117}
118
119impl Default for Cfg {
120 fn default() -> Self {
121 Self::with_sym_source(SymbolSource::new())
122 }
123}
124
125impl Cfg {
126 /// Creates an empty context-free grammar.
127 #[inline]
128 pub fn new() -> Self {
129 Self::default()
130 }
131
132 /// Creates an empty context-free grammar with the given symbol source.
133 ///
134 /// Symbols will be generated with this symbol source.
135 pub fn with_sym_source(sym_source: SymbolSource) -> Self {
136 Cfg {
137 sym_source,
138 lexemes: SymbolBitSet::new(),
139 rules: vec![],
140 roots: MaybeSmallVec::new(),
141 wrapped_roots: MaybeSmallVec::new(),
142 rhs_len_invariant: None,
143 eliminate_nulling: false,
144 tmp_stack: RefCell::new(vec![]),
145 }
146 }
147
148 /// Returns generated symbols.
149 pub fn sym<const N: usize>(&mut self) -> [Symbol; N] {
150 self.sym_source_mut().sym()
151 }
152
153 /// Generates a new unique symbol.
154 ///
155 /// If a name is given, it will be recorded within the symbol
156 /// source.
157 pub fn next_sym(&mut self, name: Option<Cow<str>>) -> Symbol {
158 self.sym_source_mut().next_sym(name)
159 }
160
161 /// Generates a new unique symbol.
162 pub fn lexeme(&mut self, name: Option<Cow<str>>) -> Symbol {
163 self.lexemes.reserve(self.num_syms() + 1);
164 let result = self.sym_source_mut().next_sym(name);
165 self.lexemes.set(result, true);
166 result
167 }
168
169 // /// Generates
170 // pub fn sym_at<const N: usize>(at: usize) -> [Symbol; N] {
171 // let mut sym_source = SymbolSource::new();
172 // for _ in 0..at {
173 // sym_source.next_sym(None);
174 // }
175 // sym_source.sym()
176 // }
177
178 /// Returns the number of symbols in use.
179 pub fn num_syms(&self) -> usize {
180 self.sym_source().num_syms()
181 }
182
183 /// Assings a new set of roots.
184 ///
185 /// A root may be also called a start symbol.
186 ///
187 /// Roots may be remapped with `Remap::remap_symbols`.
188 ///
189 /// This library needs to know the roots for operations
190 /// such as FOLLOW set calculation,
191 pub fn set_roots(&mut self, roots: impl AsRef<[Symbol]>) {
192 self.roots = roots.as_ref().iter().copied().collect();
193 }
194
195 /// Returns the list of our previously assigned roots,
196 /// or empty if there were none assigned.
197 pub fn roots(&self) -> &[Symbol] {
198 &self.roots[..]
199 }
200
201 /// Returns the list of wrapped roots,
202 pub fn wrapped_roots(&self) -> &[WrappedRoot] {
203 &self.wrapped_roots[..]
204 }
205
206 /// Assigns a new set of wrapped roots.
207 ///
208 /// A wrapped roots may derive `start_of_input`, `root` and `end_of_input`.
209 pub fn set_wrapped_roots(&mut self, wrapped_roots: &[WrappedRoot]) {
210 self.wrapped_roots = wrapped_roots.into();
211 }
212
213 /// Determines whether there are assigned roots (whether there is a start
214 /// symbol).
215 pub fn has_roots(&self) -> bool {
216 !self.roots.is_empty()
217 }
218
219 /// Modifies this grammar to its weak equivalent.
220 ///
221 /// # Design
222 ///
223 /// - Q: Can I run this function twice?
224 /// - A: Yes, it's idempotent, with the caveat that the history should
225 /// be handled correctly.
226 ///
227 /// # Invariants
228 ///
229 /// All rule RHS' have at most `limit` symbols at all times until another
230 /// call to this method followed by a rule addition, or a call to
231 /// [`fn binarize_and_eliminate_nulling_rules`].
232 ///
233 /// [`fn binarize_and_eliminate_nulling_rules`]: Self::binarize_and_eliminate_nulling_rules
234 pub fn limit_rhs_len(&mut self, limit: Option<usize>) {
235 self.rhs_len_invariant = limit;
236 for rule in mem::take(&mut self.rules) {
237 self.add_rule(rule);
238 }
239 }
240
241 /// The grammar rewrites and stores rules with a certain range of RHS lengths.
242 /// This method returns this range.
243 pub fn rule_rhs_len_allowed_range(&self) -> ops::Range<usize> {
244 self.eliminate_nulling as usize..self.rhs_len_invariant.unwrap_or(usize::MAX)
245 }
246
247 /// Sorts the rule array.
248 pub fn sort(&mut self) {
249 self.rules.sort_by_key(|rule| (rule.lhs, rule.rhs.clone()));
250 }
251
252 /// Sorts the rule array in place, using the argument to compare elements.
253 pub fn sort_by(&mut self, compare: impl FnMut(&CfgRule, &CfgRule) -> cmp::Ordering) {
254 self.rules.sort_by(compare);
255 }
256
257 /// Removes consecutive duplicate rules.
258 pub fn dedup(&mut self) {
259 self.rules.dedup_by_key(|rule| (rule.lhs, rule.rhs.clone()));
260 }
261
262 /// Extend the list of rules with the rules in the given grammar.
263 /// The given grammar must have a compatible set of symbols.
264 pub fn extend(&mut self, other: &Cfg) {
265 self.rules.extend(other.rules.iter().cloned());
266 }
267
268 /// Ensures the grammar is binarized and eliminates all nulling rules, which have the
269 /// form `A ::= ()`. Returns a nulling subgrammar containing all the eliminated parts
270 /// of the grammar.
271 ///
272 /// In other words, this method splits off the nulling parts of the grammar.
273 ///
274 /// The language represented by the grammar is preserved, except for the lack of
275 /// the empty string if there was one. Unproductive rules may be removed.
276 ///
277 /// # Design
278 ///
279 /// - Q: Why two operations in one function?
280 /// - A: We implemented nulling rule elimination for binarized grammars
281 /// only, because it's much easier to do so. There are algorthms for
282 /// such elimination for rules with arbitrary RHS length, and potentially
283 /// they do not produce that many rules as a result, but we found no such
284 /// need for our purposes. If you do, feel free to contribute.
285 /// - Q: Can I run binarization twice?
286 /// - A: Yes, it's idempotent, with the caveat that the history should
287 /// be handled correctly. This means you can limit the RHS length at
288 /// any point, and run this method later.
289 ///
290 /// # Invariants
291 ///
292 /// All rule RHS' have at least 1 symbol and at most 2 symbols at all times until
293 /// another call to this method or a call to [`fn limit_rhs_len`].
294 ///
295 /// [`fn limit_rhs_len`]: Self::limit_rhs_len
296 pub fn binarize_and_eliminate_nulling_rules(&mut self) -> Cfg {
297 self.limit_rhs_len(Some(2));
298
299 let mut result = Cfg::with_sym_source(self.sym_source.clone());
300
301 // Grab the set of nullable symbols. We will eliminate them
302 // on the RHS of every rule.
303 let mut nullable = self.nulling_symbols();
304 // If all symbols on the RHS are nullable, the LHS is also nullable,
305 // hence we use `rhs_closure_for_all`.
306 self.rhs_closure_for_all(&mut nullable);
307 if nullable.iter().count() == 0 {
308 // Nothing to do.
309 return result;
310 }
311
312 let mut rewritten_work = Cfg::new();
313 for rule in self.rules() {
314 // Here, all rules are binarized.
315 assert!(rule.rhs.len() <= 2);
316 let is_nullable = |sym: &Symbol| nullable[*sym];
317 // Get the range where the symbol(s) are non-nullable, or `None`
318 // if all symbols are non-nullable.
319 let maybe_which = match (
320 rule.rhs.get(0).map(is_nullable),
321 rule.rhs.get(1).map(is_nullable),
322 ) {
323 (Some(true), Some(true)) => Some(All(2)),
324 (Some(true), None) => Some(All(1)),
325 (None, None) => Some(All(0)),
326 (Some(true), Some(false)) => Some(Left),
327 (Some(false), Some(true)) => Some(Right),
328 (Some(false), None) | (Some(false), Some(false)) => None,
329 (None, Some(_)) => unreachable!(),
330 };
331 let which = if let Some(which) = maybe_which {
332 which
333 } else {
334 continue;
335 };
336 // Q: Why do `Into::into` and not `.into()`?
337 // A: It's less confusing than a trailing call after a struct.
338 let history: History = Into::into(HistoryNodeEliminateNulling {
339 prev: rule.history,
340 rhs0: rule.rhs.get(0).cloned(),
341 rhs1: rule.rhs.get(1).cloned(),
342 which,
343 });
344 match which {
345 All(num) => {
346 // nulling
347 if num == 2 {
348 rewritten_work
349 .rule(rule.lhs)
350 .history(history)
351 .rhs(&rule.rhs[0..1]);
352 rewritten_work
353 .rule(rule.lhs)
354 .history(history)
355 .rhs(&rule.rhs[1..2]);
356 }
357 result
358 .rule(rule.lhs)
359 .history(history)
360 .rhs(&rule.rhs[which.as_range()]);
361 }
362 Left | Right => {
363 // Q: Why `negate`?
364 // A: We are not keeping the nullable `which`,
365 // we are keeping the other symbol.
366 rewritten_work
367 .rule(rule.lhs)
368 .history(history)
369 .rhs(&rule.rhs[which.negate().as_range()]);
370 }
371 }
372 }
373
374 self.extend(&rewritten_work);
375
376 self.rules.retain(|rule| !rule.rhs.is_empty());
377
378 let mut productive = SymbolBitSet::new();
379 // TODO check again if correct
380 // Begin with marking terminal symbols appearing on the RHS as making
381 // the LHS productive.
382 productive.terminal(&*self);
383 // Q: Why subtract symbols which are productive in the nulling grammar?
384 // What does this even mean?
385 // A: This subtraction means every rule containing one or more nulling symbols
386 // will be removed with the last operation below. (Keep in mind, nulling
387 // does not mean nullable.)
388 productive.subtract_productive(&result);
389 // All symbols on the RHS must be productive for the LHS to be productive.
390 self.rhs_closure_for_all(&mut productive);
391 self.rules.retain(|rule| {
392 // Retain the rule only if it's productive. We have to, in order to remove rules
393 // that were made unproductive as a result of `A ::= epsilon` rule elimination.
394 // Otherwise, some of our nonterminal symbols might become terminal.
395 productive[rule.lhs]
396 });
397
398 result
399 }
400
401 /// Returns an iterator over the list of grammar rules.
402 pub fn rules<'a>(&'a self) -> impl Iterator<Item = &'a CfgRule>
403 where
404 Self: 'a,
405 {
406 self.rules.iter()
407 }
408
409 /// Returns an iterator over the info on the given dot position.
410 ///
411 /// # Example
412 ///
413 /// When the grammar contains rules:
414 ///
415 /// - `start ::= A B C`
416 /// - `A ::= D E`
417 ///
418 /// We can get the following info on the `col` 0:
419 ///
420 /// - `DotInfo { lhs: start, predot: None, postdot: Some(A), earley: ... }`
421 /// - `DotInfo { lhs: A, predot: None, postdot: Some(D), earley: ... }`
422 ///
423 /// Because we are getting info for dots:
424 ///
425 /// - `start ::= • A B C`
426 /// - `A ::= • D E`
427 ///
428 /// We can also get the following info on the col `2`:
429 ///
430 /// - `DotInfo { lhs: start, predot: Some(B), postdot: Some(C), earley: ... }`
431 /// - `DotInfo { lhs: A, predot: Some(E), postdot: None, earley: ... }`
432 ///
433 /// Because we are getting info for dots:
434 ///
435 /// - `start ::= A B • C`
436 /// - `A ::= D E •`
437 pub fn column(&self, col: usize) -> impl Iterator<Item = DotInfo> + '_ {
438 let mapper = move |rule: &CfgRule| DotInfo {
439 lhs: rule.lhs,
440 predot: rule.rhs.get(col.wrapping_sub(1)).copied(),
441 postdot: rule.rhs.get(col).copied(),
442 earley: Some(rule.history.dot(col)),
443 };
444 self.rules().map(mapper)
445 }
446
447 /// Allows access to the symbol source through a reference.
448 pub fn sym_source(&self) -> &SymbolSource {
449 &self.sym_source
450 }
451
452 /// Allows mutable access to the symbol source through a reference.
453 pub fn sym_source_mut(&mut self) -> &mut SymbolSource {
454 &mut self.sym_source
455 }
456
457 /// Retains only the rules specified by the predicate.
458 ///
459 /// In other words, removes all the rules for which `f(&rule)`
460 /// returns false.
461 pub fn retain(&mut self, f: impl FnMut(&CfgRule) -> bool) {
462 self.rules.retain(f);
463 }
464
465 /// Adds a rule to this grammar, binarizing or limiting its length
466 /// if [`fn limit_rhs_len`] was called. Returns the number of parts
467 /// the given rule was split into.
468 ///
469 /// [`fn limit_rhs_len`]: Self::limit_rhs_len
470 pub fn add_rule(&mut self, rule: CfgRule) -> u32 {
471 if self.rule_rhs_len_allowed_range().contains(&rule.rhs.len()) {
472 self.rules.push(rule);
473 return 1;
474 }
475
476 // Rewrite to a set of binarized rules.
477 // From `LHS ⸬= A B C … X Y Z` to:
478 // ____________________
479 // | LHS ⸬= S0 Z
480 // | S0 ⸬= S1 Y
481 // | S1 ⸬= S2 X
482 // | …
483 // | Sm ⸬= Sn C
484 // | Sn ⸬= A B
485 let mut rhs_rev = rule.rhs.to_vec();
486 rhs_rev.reverse();
487 let mut tail = Vec::new();
488 let mut parts: u32 = 0;
489 while !rhs_rev.is_empty() {
490 let tail_idx = rhs_rev
491 .len()
492 .saturating_sub(self.rule_rhs_len_allowed_range().end);
493 tail.extend(rhs_rev.drain(tail_idx..));
494 tail.reverse();
495 let lhs;
496 if rhs_rev.is_empty() {
497 lhs = rule.lhs;
498 } else {
499 lhs = self.next_sym(None);
500 rhs_rev.push(lhs);
501 }
502 let history;
503 if parts == 0 && rhs_rev.is_empty() || self.rule_rhs_len_allowed_range().end != 2 {
504 history = rule.history;
505 } else {
506 history = Into::into(HistoryNodeBinarize {
507 prev: rule.history,
508 height: parts,
509 full_len: rule.rhs.len(),
510 is_top: rhs_rev.is_empty(),
511 });
512 }
513 self.rules.push(CfgRule::new(lhs, &tail[..], history));
514 tail.clear();
515 parts += 1;
516 }
517
518 parts
519 }
520
521 /// Empties the grammar.
522 pub fn clear_rules(&mut self) {
523 self.rules.clear();
524 }
525
526 /// Reverses the grammar.
527 pub fn reverse(&mut self) {
528 for rule in &mut self.rules {
529 rule.rhs = rule.rhs.iter().copied().rev().collect();
530 }
531 }
532
533 /// Starts building a new rule.
534 pub fn rule(&mut self, lhs: Symbol) -> RuleBuilder<'_> {
535 RuleBuilder::new(self).rule(lhs)
536 }
537
538 /// Starts building a new precedenced rule.
539 pub fn precedenced_rule(&mut self, lhs: Symbol) -> PrecedencedRuleBuilder<'_> {
540 PrecedencedRuleBuilder::new(self, lhs)
541 }
542
543 /// If **any** symbols on the RHS have the property, the LHS has it too.
544 /// Updates the given symbol set according to the above, and does it
545 /// transitively.
546 pub fn rhs_closure_for_all(&self, property: &mut SymbolBitSet) {
547 self.rhs_closure(property, RhsPropertyMode::All)
548 }
549
550 /// If **all** symbols on the RHS have the property, the LHS has it too.
551 /// Updates the given symbol set according to the above, and does it
552 /// transitively.
553 pub fn rhs_closure_for_any(&self, property: &mut SymbolBitSet) {
554 self.rhs_closure(property, RhsPropertyMode::Any)
555 }
556
557 /// If **any** or **all** symbols on the RHS have the property, the LHS
558 /// has it too.
559 /// Updates the given symbol set according to the above, and does it
560 /// transitively.
561 pub fn rhs_closure(&self, property: &mut SymbolBitSet, property_mode: RhsPropertyMode) {
562 let mut tmp_stack = self.tmp_stack.borrow_mut();
563 tmp_stack.extend(property.iter());
564
565 let occurence_map = OccurenceMap::from_rules(self.rules());
566
567 while let Some(work_sym) = tmp_stack.pop() {
568 for &rule_id in occurence_map.get(work_sym).rhs() {
569 let rule = &self.rules[rule_id];
570 let mut rhs_iter = rule.rhs.iter();
571 let get_property = |&sym: &Symbol| property[sym];
572 let rhs_satifies_property = match property_mode {
573 RhsPropertyMode::All => rhs_iter.all(get_property),
574 RhsPropertyMode::Any => rhs_iter.any(get_property),
575 };
576 if !property[rule.lhs] && rhs_satifies_property {
577 property.set(rule.lhs, true);
578 tmp_stack.push(rule.lhs);
579 }
580 }
581 }
582
583 tmp_stack.clear();
584 }
585
586 /// Primarily for minimal distance computation.
587 ///
588 /// The `value` argument contains a list of weights, one per symbol. The elements `None`
589 /// will be filled with new weights.
590 pub fn rhs_closure_with_values(&mut self, value: &mut [Option<u32>]) {
591 let mut tmp_stack = self.tmp_stack.borrow_mut();
592 for (maybe_sym_value, sym) in value.iter().zip(SymbolSource::generate_fresh()) {
593 if maybe_sym_value.is_some() {
594 tmp_stack.push(sym);
595 }
596 }
597
598 let occurence_map = OccurenceMap::from_rules(self.rules());
599
600 while let Some(work_sym) = tmp_stack.pop() {
601 let rules = occurence_map
602 .get(work_sym)
603 .rhs()
604 .iter()
605 .map(|&rule_id| &self.rules[rule_id]);
606 for rule in rules {
607 let maybe_work_value = rule
608 .rhs
609 .iter()
610 .try_fold(0, |acc, elem| value[elem.usize()].map(|val| acc + val));
611 if let Some(work_value) = maybe_work_value {
612 if let Some(current_value) = value[rule.lhs.usize()] {
613 if current_value <= work_value {
614 continue;
615 }
616 }
617 value[rule.lhs.usize()] = Some(work_value);
618 tmp_stack.push(rule.lhs);
619 }
620 }
621 }
622
623 tmp_stack.clear();
624 }
625
626 /// Modifies this grammar to wrap all roots by adding rules of the form:
627 /// `wrapped_root ::= start_of_input ~ root ~ end_of_input`.
628 ///
629 /// The result can be accessed with [`fn wrapped_roots`] and replaced using
630 /// [`fn set_wrapped_roots`].
631 ///
632 /// [`fn wrapped_roots`]: Self::wrapped_roots
633 /// [`fn set_wrapped_roots`]: Self::set_wrapped_roots
634 pub fn wrap_input(&mut self) {
635 self.wrapped_roots.clear();
636 let roots_len = self.roots.len();
637 let roots = mem::replace(&mut self.roots, MaybeSmallVec::with_capacity(roots_len));
638 for inner_root in roots {
639 let [root, start_of_input, end_of_input] = self.sym_source.with_names([
640 Some("root"),
641 Some("start_of_input"),
642 Some("end_of_input"),
643 ]);
644 self.add_rule(CfgRule {
645 lhs: root,
646 rhs: [start_of_input, inner_root, end_of_input].into(),
647 history: RootHistoryNode::Rule { lhs: root }.into(),
648 });
649 self.wrapped_roots.push(WrappedRoot {
650 root,
651 start_of_input,
652 inner_root,
653 end_of_input,
654 });
655 self.roots.push(root);
656 }
657 }
658
659 /// Checks whether the grammar is empty.
660 pub fn is_empty(&self) -> bool {
661 if self.wrapped_roots.is_empty() {
662 self.rules.is_empty()
663 } else {
664 let mut roots = self.roots.clone();
665 roots.sort();
666 self.rules()
667 .all(|rule| roots.binary_search(&rule.lhs).is_ok())
668 }
669 }
670
671 /// Formats the grammar to a `String`. The output looks like this:
672 ///
673 /// ```ignore
674 /// start(1) ::= A(2) B(3) C(4);
675 /// A(2) ::= g0(5) B(3);
676 /// ```
677 ///
678 /// Or, in case of no names to display in the entire grammar (all symbols
679 /// are gensyms), only the numbers are displayed.
680 pub fn stringify_to_bnf(&self) -> String {
681 let mut result = String::new();
682 let no_names = self.sym_source.names().iter().all(|name| name.is_none());
683 for rule in self.rules() {
684 let stringify_sym = |sym: Symbol| {
685 if no_names {
686 format!("{}", sym.usize())
687 } else {
688 format!("{}({})", self.sym_source.name_of(sym), sym.usize())
689 }
690 };
691 let lhs = stringify_sym(rule.lhs);
692 let rhs = if rule.rhs.is_empty() {
693 "()".into()
694 } else {
695 rule.rhs
696 .iter()
697 .copied()
698 .map(stringify_sym)
699 .enumerate()
700 .map(|(i, elem)| {
701 if i == 0 {
702 elem.to_string()
703 } else {
704 format!(" ~ {}", elem)
705 }
706 })
707 .collect::<String>()
708 };
709 writeln!(&mut result, "{} ::= {};", lhs, rhs).expect("writing to String failed");
710 }
711 result
712 }
713}
714
715impl CfgRule {
716 /// Creates a new rule.
717 pub fn new(lhs: Symbol, rhs: impl AsRef<[Symbol]>, history: History) -> Self {
718 CfgRule {
719 lhs,
720 rhs: rhs.as_ref().into(),
721 history,
722 }
723 }
724
725 /// Creates a named grammar rule with the symbols having names
726 /// grabbed from the given symbol source.
727 ///
728 /// # Panics
729 ///
730 /// Panics if the given list is empty.
731 pub fn named(&self, sym_source: &SymbolSource) -> NamedCfgRule {
732 NamedCfgRule {
733 lhs: self.lhs,
734 rhs: self.rhs.clone(),
735 history: Some(self.history),
736 names: sym_source.names(),
737 }
738 }
739}
740
741impl NamedCfgRule {
742 /// Creates a named grammar rule with the symbols having the given names.
743 /// LHS will have the first name in the list. The RHS is created with
744 /// the remaining names, one RHS symbol per name.
745 ///
746 /// # Panics
747 ///
748 /// Panics if the given list is empty.
749 pub fn new(names: Vec<Option<SymbolName>>) -> Self {
750 let mut iter = SymbolSource::generate_fresh();
751 NamedCfgRule {
752 lhs: iter.next().unwrap(),
753 rhs: iter.take(names.len() - 1).collect::<Vec<_>>().into(),
754 history: None,
755 names,
756 }
757 }
758
759 /// Creates a named grammar rule with the symbols having the given names,
760 /// and the given history.
761 ///
762 /// # Panics
763 ///
764 /// Panics if the given list is empty.
765 pub fn with_history(names: Vec<Option<SymbolName>>, history: History) -> Self {
766 let mut iter = SymbolSource::generate_fresh();
767 NamedCfgRule {
768 lhs: iter.next().unwrap(),
769 rhs: iter.take(names.len() - 1).collect::<Vec<_>>().into(),
770 history: Some(history),
771 names,
772 }
773 }
774}
775
776/// Creates a new Cfg rule, which holds names for debugging purposes.
777#[macro_export]
778macro_rules! named_cfg_rule {
779 ($lhs:ident ::= $($rhs:ident)*) => {
780 {
781 use std::rc::Rc;
782 NamedCfgRule::new(vec![Some(stringify!($lhs).into()), $(Some(stringify!($rhs).into())),*])
783 }
784 };
785}
786
787impl Eq for NamedCfgRule {
788 fn assert_receiver_is_total_eq(&self) {}
789}
790
791impl PartialEq for NamedCfgRule {
792 fn eq(&self, other: &Self) -> bool {
793 self.names[self.lhs.usize()] == other.names[other.lhs.usize()]
794 && self.rhs.len() == other.rhs.len()
795 && self
796 .rhs
797 .iter()
798 .zip(other.rhs.iter())
799 .all(|(sym_a, sym_b)| self.names[sym_a.usize()] == other.names[sym_b.usize()])
800 && (self.history.is_none() || other.history.is_none() || self.history == other.history)
801 }
802}
803
804impl fmt::Debug for NamedCfgRule {
805 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
806 let gensym = &"gensym".to_string();
807 let lhs = self.names[self.lhs.usize()].as_deref().unwrap_or(gensym);
808 let rhs = self
809 .rhs
810 .iter()
811 .map(|sym| self.names[sym.usize()].as_deref().unwrap_or(gensym))
812 .collect::<Vec<_>>();
813 f.debug_struct("NamedCfgRule")
814 .field("lhs", &lhs)
815 .field("rhs", &rhs)
816 .field("history", &self.history)
817 .finish()
818 }
819}