cfg_grammar/
symbol_bit_set.rs

1//! Informs whether symbols are terminal or nonterminal.
2
3use std::{iter, ops};
4
5use bit_vec;
6use bit_vec::BitVec;
7
8use crate::local_prelude::*;
9
10/// A set of symbols in the form of a bit vector.
11///
12#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
13#[derive(miniserde::Serialize, miniserde::Deserialize, Clone, Debug)]
14pub struct SymbolBitSet {
15    bit_vec: BitVec,
16}
17
18impl Default for SymbolBitSet {
19    fn default() -> Self {
20        Self::new()
21    }
22}
23
24impl SymbolBitSet {
25    /// Constructs an empty `SymbolBitSet`.
26    pub fn new() -> Self {
27        SymbolBitSet {
28            bit_vec: BitVec::new(),
29        }
30    }
31
32    /// Constructs a `SymbolBitSet`.
33    pub fn from_elem(grammar: &Cfg, elem: bool) -> Self {
34        SymbolBitSet {
35            bit_vec: BitVec::from_elem(grammar.num_syms(), elem),
36        }
37    }
38
39    /// Resets the set to `false` repeated as many times as there are symbols
40    /// in the given source.
41    ///
42    /// In other words, none of the symbols are included, but we have space to mark them
43    /// as included.
44    pub fn reset(&mut self, symbol_source: &SymbolSource) {
45        self.bit_vec = BitVec::new();
46        self.bit_vec
47            .extend(iter::repeat(false).take(symbol_source.num_syms()));
48    }
49
50    /// Resets the set to `true` repeated as many times as there are symbols
51    /// in the given source.
52    ///
53    /// In other words, all of the symbols are included in the set.
54    pub fn set_all(&mut self, symbol_source: &SymbolSource) {
55        self.bit_vec = BitVec::new();
56        self.bit_vec
57            .extend(iter::repeat(true).take(symbol_source.num_syms()));
58    }
59
60    /// Makes this set include only symbols which appear anywhere in the grammar.
61    pub fn used(&mut self, grammar: &Cfg) {
62        self.reset(grammar.sym_source());
63        for rule in grammar.rules() {
64            self.set(rule.lhs, true);
65            for &sym in &rule.rhs[..] {
66                self.set(sym, true);
67            }
68        }
69    }
70
71    /// Makes this set include only symbols which do **not** appear
72    /// anywhere in the grammar.
73    pub fn unused(&mut self, grammar: &Cfg) {
74        self.set_all(grammar.sym_source());
75        for rule in grammar.rules() {
76            self.set(rule.lhs, false);
77            for &sym in &rule.rhs[..] {
78                self.set(sym, false);
79            }
80        }
81    }
82
83    /// Included symbols will be excluded, and symbols that do not
84    /// appear in the set will be included instead.
85    pub fn negate(&mut self) {
86        self.bit_vec.negate();
87    }
88
89    /// Gathers information about whether symbols are terminal or nonterminal.
90    /// Constructs a set of terminal symbols.
91    ///
92    /// Constructs a data structure in O(n) time.
93    pub fn terminal(&mut self, grammar: &Cfg) {
94        self.set_all(grammar.sym_source());
95        for rule in grammar.rules() {
96            self.set(rule.lhs, false);
97        }
98    }
99
100    /// Gathers information about whether symbols are nulling.
101    /// Includes the set of nulling symbols.
102    ///
103    /// Constructs a data structure in O(n) time.
104    pub fn nulling(&mut self, grammar: &Cfg) {
105        if self.space() == 0 {
106            self.reset(grammar.sym_source());
107        }
108        for rule in grammar.rules() {
109            if rule.rhs.is_empty() {
110                self.set(rule.lhs, true);
111            }
112        }
113    }
114
115    /// Gathers information about whether symbols are terminal or nonterminal.
116    /// Includes the set of nonterminal symbols.
117    ///
118    /// Constructs a data structure in O(n) time.
119    pub fn productive(&mut self, grammar: &Cfg) {
120        if self.space() == 0 {
121            self.reset(grammar.sym_source());
122        }
123        for rule in grammar.rules() {
124            self.set(rule.lhs, true);
125        }
126    }
127
128    /// Excludes all nonterminals from the given grammar
129    /// from the set.
130    pub fn subtract_productive(&mut self, grammar: &Cfg) {
131        if self.space() == 0 {
132            self.set_all(grammar.sym_source());
133        }
134        for rule in grammar.rules() {
135            self.set(rule.lhs, false);
136        }
137    }
138
139    /// Sets the membership for the given symbol.
140    pub fn set(&mut self, index: Symbol, elem: bool) {
141        self.bit_vec.set(index.usize(), elem);
142    }
143
144    /// Allows access to the underlying `BitVec`.
145    pub fn bit_vec(&self) -> &BitVec {
146        &self.bit_vec
147    }
148
149    /// Converts into a bit vector.
150    pub fn into_bit_vec(self) -> BitVec {
151        self.bit_vec
152    }
153
154    /// Iterates over symbols in the set.
155    pub fn iter(&self) -> impl Iterator<Item = Symbol> + use<'_> {
156        self.bit_vec
157            .iter()
158            .zip(SymbolSource::generate_fresh())
159            .filter_map(|(is_present, sym)| if is_present { Some(sym) } else { None })
160    }
161
162    /// Includes all symbols that are included in the other set.
163    pub fn union(&mut self, other: &SymbolBitSet) {
164        self.bit_vec.or(&other.bit_vec);
165    }
166
167    /// Returns the number of symbols we have space for.
168    pub fn space(&self) -> usize {
169        self.bit_vec.len()
170    }
171
172    /// Determines whether this set contains all symbols we have
173    /// space for.
174    pub fn all(&self) -> bool {
175        self.bit_vec.iter().all(|b| b)
176    }
177
178    /// Not to be confused with `fn is_empty`.
179    pub fn is_empty(&self) -> bool {
180        self.bit_vec.iter().any(|b| b)
181    }
182
183    /// Reserves space for additional symbols.
184    pub fn reserve(&mut self, len: usize) {
185        self.bit_vec
186            .extend(iter::repeat(false).take(len.saturating_sub(self.bit_vec.len())));
187    }
188}
189
190static TRUE: bool = true;
191static FALSE: bool = false;
192
193impl ops::Index<Symbol> for SymbolBitSet {
194    type Output = bool;
195
196    fn index(&self, index: Symbol) -> &Self::Output {
197        if self.bit_vec[index.usize()] {
198            &TRUE
199        } else {
200            &FALSE
201        }
202    }
203}
204
205impl Cfg {
206    /// Constructs a set of terminal symbols.
207    pub fn terminal_symbols(&self) -> SymbolBitSet {
208        let mut set = SymbolBitSet::new();
209        set.terminal(self);
210        set
211    }
212
213    /// Constructs a set of nulling symbols.
214    pub fn nulling_symbols(&self) -> SymbolBitSet {
215        let mut set = SymbolBitSet::new();
216        set.reset(self.sym_source());
217        set.nulling(self);
218        set
219    }
220
221    /// Constructs a set of unused symbols.
222    pub fn unused_symbols(&self) -> SymbolBitSet {
223        let mut set = SymbolBitSet::new();
224        set.reset(self.sym_source());
225        set.unused(self);
226        set
227    }
228}