1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
//! FIRST sets.

use std::collections::{BTreeMap, BTreeSet};

use grammar::{ContextFree, ContextFreeRef};
use prediction::PerSymbolSets;
use rule::GrammarRule;
use symbol::{Symbol, SymbolBitSet};

/// FIRST sets.
pub struct FirstSets {
    pub(super) map: PerSymbolSets,
}

/// Collector of FIRST sets.
pub struct FirstSetsCollector<'a, G> {
    pub(super) map: PerSymbolSets,
    lookahead: Vec<Option<Symbol>>,
    changed: bool,
    terminal_set: SymbolBitSet,
    grammar: &'a G,
}

impl FirstSets {
    /// Access the FIRST sets.
    pub fn first_sets(&self) -> &PerSymbolSets {
        &self.map
    }
}

impl<'a, G> FirstSetsCollector<'a, G>
    where G: ContextFree,
            &'a G: ContextFreeRef<'a, Target = G>,
{
    /// Compute all FIRST sets of the grammar.
    ///
    /// We define a binary relation FIRST(N, S), in which N is related to S
    /// if the grammar has a production of the form `N ⸬= α S β`, where
    /// α is a nullable string of symbols.
    ///
    /// We compute the transitive closure of this relation.
    pub fn new(grammar: &'a G) -> Self {
        let mut this = FirstSetsCollector {
            map: BTreeMap::new(),
            lookahead: vec![],
            changed: true,
            terminal_set: SymbolBitSet::terminal_set(&grammar),
            grammar,
        };

        this.collect();
        this
    }

    /// Calculates a FIRST set for a string of symbols.
    pub fn first_set_for_string(&self, string: &[Symbol]) -> BTreeSet<Option<Symbol>> {
        let mut result = BTreeSet::new();
        for &sym in string {
            let result_cardinality = result.len();
            if self.terminal_set.has_sym(sym) {
                result.insert(Some(sym));
            } else {
                let first_set = self.map.get(&sym).unwrap();
                for &maybe_terminal in first_set {
                    if maybe_terminal.is_some() {
                        result.insert(maybe_terminal);
                    }
                }
            }
            if result_cardinality != result.len() {
                break;
            }
        }
        if result.is_empty() {
            result.insert(None);
        }
        result
    }

    /// Returns a FIRST sets structure.
    pub fn first_sets(&self) -> &PerSymbolSets {
        &self.map
    }

    fn collect(&mut self) {
        while self.changed {
            self.changed = false;
            for rule in self.grammar.rules() {
                let set_changed = self.rule(rule.lhs(), rule.rhs());
                self.changed |= set_changed;
            }
        }
    }

    fn rule(&mut self, lhs: Symbol, rhs: &[Symbol]) -> bool {
        self.first_set_collect(rhs);
        let first_set = self.map.entry(lhs).or_insert_with(BTreeSet::new);
        let prev_cardinality = first_set.len();
        first_set.extend(self.lookahead.iter().cloned());
        self.lookahead.clear();
        prev_cardinality != first_set.len()
    }

    /// Compute a FIRST set.
    fn first_set_collect(&mut self, rhs: &[Symbol]) {
        for &sym in rhs {
            let mut nullable = false;
            if self.terminal_set.has_sym(sym) {
                self.lookahead.push(Some(sym));
            } else {
                match self.map.get(&sym) {
                    None => {
                        // This should only happen during set construction; it
                        // corresponds to an entry that has not yet been
                        // built. Otherwise, it would mean a nonterminal with
                        // no productions. Either way, the resulting first set
                        // should be empty.
                    }
                    Some(set) => {
                        for &maybe_terminal in set {
                            if maybe_terminal.is_some() {
                                self.lookahead.push(maybe_terminal);
                            } else {
                                nullable = true;
                            }
                        }
                    }
                }
            }
            if !nullable {
                // Successfully found a FIRST symbol.
                return;
            }
        }
        self.lookahead.push(None);
    }
}