grammar_utils/
grammar.rs

1// Offset into Grammar::symbols
2#[derive(Clone, Copy, Eq, PartialEq)]
3pub(crate) struct SymbolIndex(usize);
4
5// Offset into Grammar::rules
6#[derive(Clone, Copy, Eq, PartialEq)]
7pub(crate) struct RuleIndex(usize);
8
9impl From<SymbolIndex> for usize {
10    fn from(value: SymbolIndex) -> Self {
11        value.0
12    }
13}
14
15impl From<RuleIndex> for usize {
16    fn from(value: RuleIndex) -> Self {
17        value.0
18    }
19}
20
21pub struct GrammarBuilder {
22    symbols: Vec<SymbolData>,
23    rules: Vec<RuleData>,
24}
25
26/// `Grammar` represents a context-free grammar.
27///
28/// It tracks a set of symbols and a set of production rules showing how those symbols interact.
29pub struct Grammar {
30    symbols: Vec<SymbolData>,
31    rules: Vec<RuleData>,
32}
33
34// Symbols should have a name.
35// Ideally, these should be a C-style identifier.
36struct SymbolData {
37    name: String,
38}
39
40// A production rule takes a nonterminal symbol on the LHS
41// to a sequence of symbols (terminal and/or nonterminal) on the RHS
42struct RuleData {
43    lhs: SymbolIndex,
44    rhs: Vec<SymbolIndex>,
45}
46
47impl std::fmt::Debug for Grammar {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        for rule in &self.rules() {
50            writeln!(f, "{rule:?}")?;
51        }
52        Ok(())
53    }
54}
55
56impl<'g> std::fmt::Debug for Symbol<'g> {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        write!(f, "{}", self.name())
59    }
60}
61
62impl<'g> std::fmt::Debug for Rule<'g> {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        let lhs = format!("{:?}", self.lhs());
65        let rhs = self.rhs().into_iter().map(|symbol| format!("{symbol:?}")).collect::<Vec<_>>().join(" ");
66        write!(f, "{lhs} -> {rhs}")
67    }
68}
69
70/// A `Symbol` is a handle to a symbol inside of a `Grammar`.
71#[derive(Clone, Copy)]
72pub struct Symbol<'g> {
73    grammar: &'g Grammar,
74    index: SymbolIndex,
75}
76
77/// A `Rule` is a handle to a rule inside of a `Grammar`.
78#[derive(Clone, Copy)]
79pub struct Rule<'g> {
80    grammar: &'g Grammar,
81    index: RuleIndex,
82}
83
84impl<'g> PartialEq for Symbol<'g> {
85    fn eq(&self, other: &Self) -> bool {
86        std::ptr::eq(self.grammar, other.grammar) && self.index == other.index
87    }
88}
89
90impl<'g> Eq for Symbol<'g> {}
91
92impl<'g> PartialEq for Rule<'g> {
93    fn eq(&self, other: &Self) -> bool {
94        std::ptr::eq(self.grammar, other.grammar) && self.index == other.index
95    }
96}
97
98impl<'g> Eq for Rule<'g> {}
99
100impl<'g> std::hash::Hash for Symbol<'g> {
101    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
102        self.index.0.hash(state)
103    }
104}
105
106impl<'g> std::hash::Hash for Rule<'g> {
107    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
108        self.index.0.hash(state)
109    }
110}
111
112impl<'g> Symbol<'g> {
113    /// The `Grammar` backing this symbol.
114    pub fn grammar(&self) -> &'g Grammar {
115        self.grammar
116    }
117
118    fn data(&self) -> &SymbolData {
119        &self.grammar.symbols[self.index.0]
120    }
121
122    /// The name of the symbol.
123    pub fn name(&self) -> String {
124        self.data().name.clone()
125    }
126
127    /// Is the symbol a terminal?
128    pub fn is_terminal(&self) -> bool {
129        !self.is_nonterminal()
130    }
131
132    /// Is the symbol a nonterminal?
133    pub fn is_nonterminal(&self) -> bool {
134        for rule in self.grammar.rules() {
135            if rule.lhs() == *self {
136                return true;
137            }
138        }
139        false
140    }
141}
142
143impl<'g> Rule<'g> {
144    /// The `Grammar` backing this rule.
145    pub fn grammar(&self) -> &'g Grammar {
146        self.grammar
147    }
148
149    fn data(&self) -> &RuleData {
150        &self.grammar.rules[self.index.0]
151    }
152
153    /// The symbol on the LHS.
154    pub fn lhs(&self) -> Symbol<'g> {
155        Symbol {
156            grammar: self.grammar,
157            index: self.data().lhs,
158        }
159    }
160
161    /// The sequence of symbols on the RHS.
162    pub fn rhs(&self) -> Vec<Symbol<'g>> {
163        let mut result = vec![];
164        for symbol in &self.data().rhs {
165            result.push(Symbol {
166                grammar: self.grammar,
167                index: *symbol,
168            });
169        }
170        result
171    }
172}
173
174impl Grammar {
175    /// A builder API for creating a Grammar object.
176    pub fn new() -> GrammarBuilder {
177        GrammarBuilder {
178            symbols: vec![],
179            rules: vec![],
180        }
181    }
182
183    /// The set of symbols.
184    pub fn symbols(&self) -> Vec<Symbol> {
185        let mut result = vec![];
186        for index in 0..self.symbols.len() {
187            result.push(Symbol {
188                grammar: self,
189                index: SymbolIndex(index),
190            });
191        }
192        result
193    }
194
195    /// The set of terminal symbols.
196    pub fn terminals(&self) -> Vec<Symbol> {
197        let mut result = vec![];
198        for symbol in self.symbols() {
199            if symbol.is_terminal() {
200                result.push(symbol);
201            }
202        }
203        result
204    }
205
206    /// The set of nonterminal symbols.
207    pub fn nonterminals(&self) -> Vec<Symbol> {
208        let mut result = vec![];
209        for symbol in self.symbols() {
210            if symbol.is_nonterminal() {
211                result.push(symbol);
212            }
213        }
214        result
215    }
216
217    /// The set of rules.
218    pub fn rules(&self) -> Vec<Rule> {
219        let mut result = vec![];
220        for index in 0..self.rules.len() {
221            result.push(Rule {
222                grammar: self,
223                index: RuleIndex(index),
224            });
225        }
226        result
227    }
228
229    /// Fetch the symbol with the given name if it exists.
230    pub fn symbol<S: AsRef<str>>(&self, name: S) -> Option<Symbol> {
231        for (index, symbol) in self.symbols.iter().enumerate() {
232            if symbol.name == name.as_ref() {
233                return Some(Symbol {
234                    grammar: self,
235                    index: SymbolIndex(index),
236                });
237            }
238        }
239        None
240    }
241}
242
243impl GrammarBuilder {
244    /// Declare a symbol.
245    pub fn symbol<S: Into<String>>(mut self, name: S) -> Self {
246        let name: String = name.into();
247        assert!(!self.symbols.iter().any(|symbol_data| symbol_data.name == name), "Symbol declared twice: {name}");
248        self.symbols.push(SymbolData {
249            name,
250        });
251        self
252    }
253
254    /// Declare a rule for this grammar.
255    pub fn rule<S: AsRef<str>>(mut self, lhs: S, rhs: &[S]) -> Self {
256        let rhs: Vec<SymbolIndex> = rhs.iter().map(|symbol_name| self.symbol_index(symbol_name.as_ref())).collect();
257
258        self.rules.push(RuleData {
259            lhs: self.symbol_index(lhs.as_ref()),
260            rhs,
261        });
262        self
263    }
264
265    /// Finish building this object and return the result as a `Grammar`.
266    pub fn build(self) -> Grammar {
267        Grammar {
268            symbols: self.symbols,
269            rules: self.rules,
270        }
271    }
272
273    fn symbol_index(&self, symbol_name: &str) -> SymbolIndex {
274        for (i, symbol) in self.symbols.iter().enumerate() {
275            if symbol.name == symbol_name {
276                return SymbolIndex(i);
277            }
278        }
279        panic!("No such symbol: {symbol_name}")
280    }
281}