cfg_predict/
first.rs

1//! FIRST sets.
2
3use std::collections::{BTreeMap, BTreeSet};
4
5use cfg_grammar::symbol::set::SymbolBitSet;
6use cfg_grammar::RuleContainer;
7use cfg_symbol::Symbol;
8
9use super::{PerSymbolSets, PredictSets};
10
11/// Collector of FIRST sets.
12pub struct FirstSets<'a, G> {
13    pub(super) map: PerSymbolSets,
14    lookahead: Vec<Option<Symbol>>,
15    changed: bool,
16    terminal_set: SymbolBitSet,
17    grammar: &'a G,
18}
19
20impl<'a, G> FirstSets<'a, G>
21where
22    G: RuleContainer,
23{
24    /// Compute all FIRST sets of the grammar.
25    ///
26    /// We define a binary relation FIRST(N, S), in which N is related to S
27    /// if the grammar has a production of the form `N ⸬= α S β`, where
28    /// α is a nullable string of symbols.
29    ///
30    /// We compute the transitive closure of this relation.
31    pub fn new(grammar: &'a G) -> Self {
32        let mut this = FirstSets {
33            map: BTreeMap::new(),
34            lookahead: vec![],
35            changed: true,
36            terminal_set: SymbolBitSet::terminal_set(grammar),
37            grammar,
38        };
39
40        this.collect();
41        this
42    }
43
44    /// Calculates a FIRST set for a string of symbols.
45    pub fn first_set_for_string(&self, string: &[Symbol]) -> BTreeSet<Option<Symbol>> {
46        let mut result = BTreeSet::new();
47        for &sym in string {
48            let result_cardinality = result.len();
49            if self.terminal_set.has_sym(sym) {
50                result.insert(Some(sym));
51            } else {
52                let first_set = self.map.get(&sym).unwrap();
53                for &maybe_terminal in first_set {
54                    if maybe_terminal.is_some() {
55                        result.insert(maybe_terminal);
56                    }
57                }
58            }
59            if result_cardinality != result.len() {
60                break;
61            }
62        }
63        if result.is_empty() {
64            result.insert(None);
65        }
66        result
67    }
68
69    fn collect(&mut self) {
70        while self.changed {
71            self.changed = false;
72            for rule in self.grammar.rules() {
73                let set_changed = self.rule(rule.lhs, rule.rhs);
74                self.changed |= set_changed;
75            }
76        }
77    }
78
79    fn rule(&mut self, lhs: Symbol, rhs: &[Symbol]) -> bool {
80        self.first_set_collect(rhs);
81        let first_set = self.map.entry(lhs).or_insert_with(BTreeSet::new);
82        let prev_cardinality = first_set.len();
83        first_set.extend(self.lookahead.iter().cloned());
84        self.lookahead.clear();
85        prev_cardinality != first_set.len()
86    }
87
88    /// Compute a FIRST set.
89    fn first_set_collect(&mut self, rhs: &[Symbol]) {
90        for &sym in rhs {
91            let mut nullable = false;
92            if self.terminal_set.has_sym(sym) {
93                self.lookahead.push(Some(sym));
94            } else {
95                match self.map.get(&sym) {
96                    None => {
97                        // This should only happen during set construction; it
98                        // corresponds to an entry that has not yet been
99                        // built. Otherwise, it would mean a nonterminal with
100                        // no productions. Either way, the resulting first set
101                        // should be empty.
102                    }
103                    Some(set) => {
104                        for &maybe_terminal in set {
105                            if maybe_terminal.is_some() {
106                                self.lookahead.push(maybe_terminal);
107                            } else {
108                                nullable = true;
109                            }
110                        }
111                    }
112                }
113            }
114            if !nullable {
115                // Successfully found a FIRST symbol.
116                return;
117            }
118        }
119        self.lookahead.push(None);
120    }
121}
122
123impl<'a, G> PredictSets for FirstSets<'a, G> {
124    /// Returns a reference to FIRST sets.
125    fn predict_sets(&self) -> &PerSymbolSets {
126        &self.map
127    }
128}