Skip to main content

grammar_utils/
grammar.rs

1// Offset into Grammar::symbols
2#[derive(Clone, Copy, Eq, PartialEq, Debug, Ord, PartialOrd)]
3pub struct SymbolIndex(usize);
4
5// Offset into Grammar::rules
6#[derive(Clone, Copy, Eq, PartialEq, Debug, Ord, PartialOrd)]
7pub struct RuleIndex(usize);
8
9impl From<SymbolIndex> for usize {
10    fn from(value: SymbolIndex) -> Self {
11        value.0
12    }
13}
14
15impl From<RuleIndex> for usize {
16    fn from(value: RuleIndex) -> Self {
17        value.0
18    }
19}
20
21pub struct GrammarBuilder {
22    symbols: Vec<SymbolData>,
23    rules: Vec<RuleData>,
24}
25
26/// `Grammar` represents a context-free grammar.
27///
28/// It tracks a set of symbols and a set of production rules showing how those symbols interact.
29pub struct Grammar {
30    symbols: Vec<SymbolData>,
31    rules: Vec<RuleData>,
32}
33
34// Symbols should have a name.
35// Ideally, these should be a C-style identifier.
36struct SymbolData {
37    name: String,
38}
39
40// A production rule takes a nonterminal symbol on the LHS
41// to a sequence of symbols (terminal and/or nonterminal) on the RHS
42struct RuleData {
43    lhs: SymbolIndex,
44    rhs: Vec<SymbolIndex>,
45}
46
47impl std::fmt::Debug for Grammar {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        for rule in &self.rules() {
50            writeln!(f, "{rule:?}")?;
51        }
52        Ok(())
53    }
54}
55
56impl<'g> std::fmt::Debug for Symbol<'g> {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        write!(f, "{}", self.name())
59    }
60}
61
62impl<'g> std::fmt::Debug for Rule<'g> {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        let lhs = format!("{:?}", self.lhs());
65        let rhs = self.rhs().into_iter().map(|symbol| format!("{symbol:?}")).collect::<Vec<_>>().join(" ");
66        write!(f, "{lhs} -> {rhs}")
67    }
68}
69
70/// A `Symbol` is a handle to a symbol inside of a `Grammar`.
71#[derive(Clone, Copy)]
72pub struct Symbol<'g> {
73    grammar: &'g Grammar,
74    index: SymbolIndex,
75}
76
77/// A `Rule` is a handle to a rule inside of a `Grammar`.
78#[derive(Clone, Copy)]
79pub struct Rule<'g> {
80    grammar: &'g Grammar,
81    index: RuleIndex,
82}
83
84impl<'g> PartialEq for Symbol<'g> {
85    fn eq(&self, other: &Self) -> bool {
86        std::ptr::eq(self.grammar, other.grammar) && self.index == other.index
87    }
88}
89
90impl<'g> Eq for Symbol<'g> {}
91
92impl<'g> PartialEq for Rule<'g> {
93    fn eq(&self, other: &Self) -> bool {
94        std::ptr::eq(self.grammar, other.grammar) && self.index == other.index
95    }
96}
97
98impl<'g> Eq for Rule<'g> {}
99
100impl<'g> std::hash::Hash for Symbol<'g> {
101    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
102        self.index.0.hash(state)
103    }
104}
105
106impl<'g> PartialOrd for Symbol<'g> {
107    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
108        self.index.partial_cmp(&other.index)
109    }
110}
111
112impl<'g> Ord for Symbol<'g> {
113    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
114        self.index.cmp(&other.index)
115    }
116}
117
118impl<'g> std::hash::Hash for Rule<'g> {
119    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
120        self.index.0.hash(state)
121    }
122}
123
124impl<'g> PartialOrd for Rule<'g> {
125    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
126        self.index.partial_cmp(&other.index)
127    }
128}
129
130impl<'g> Ord for Rule<'g> {
131    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
132        self.index.cmp(&other.index)
133    }
134}
135
136impl<'g> Symbol<'g> {
137    /// The `Grammar` backing this symbol.
138    pub fn grammar(&self) -> &'g Grammar {
139        self.grammar
140    }
141
142    fn data(&self) -> &SymbolData {
143        &self.grammar.symbols[self.index.0]
144    }
145
146    /// The name of the symbol.
147    pub fn name(&self) -> String {
148        self.data().name.clone()
149    }
150
151    /// Is the symbol a terminal?
152    pub fn is_terminal(&self) -> bool {
153        !self.is_nonterminal()
154    }
155
156    /// Is the symbol a nonterminal?
157    pub fn is_nonterminal(&self) -> bool {
158        for rule in self.grammar.rules() {
159            if rule.lhs() == *self {
160                return true;
161            }
162        }
163        false
164    }
165}
166
167impl<'g> Rule<'g> {
168    /// The `Grammar` backing this rule.
169    pub fn grammar(&self) -> &'g Grammar {
170        self.grammar
171    }
172
173    pub fn index(&self) -> RuleIndex {
174        self.index
175    }
176
177    fn data(&self) -> &RuleData {
178        &self.grammar.rules[self.index.0]
179    }
180
181    /// The symbol on the LHS.
182    pub fn lhs(&self) -> Symbol<'g> {
183        Symbol {
184            grammar: self.grammar,
185            index: self.data().lhs,
186        }
187    }
188
189    /// The sequence of symbols on the RHS.
190    pub fn rhs(&self) -> Vec<Symbol<'g>> {
191        let mut result = vec![];
192        for symbol in &self.data().rhs {
193            result.push(Symbol {
194                grammar: self.grammar,
195                index: *symbol,
196            });
197        }
198        result
199    }
200}
201
202impl Grammar {
203    /// A builder API for creating a Grammar object.
204    pub fn new() -> GrammarBuilder {
205        GrammarBuilder {
206            symbols: vec![],
207            rules: vec![],
208        }
209    }
210
211    /// The set of symbols.
212    pub fn symbols(&self) -> Vec<Symbol> {
213        let mut result = vec![];
214        for index in 0..self.symbols.len() {
215            result.push(Symbol {
216                grammar: self,
217                index: SymbolIndex(index),
218            });
219        }
220        result
221    }
222
223    /// The set of terminal symbols.
224    pub fn terminals(&self) -> Vec<Symbol> {
225        let mut result = vec![];
226        for symbol in self.symbols() {
227            if symbol.is_terminal() {
228                result.push(symbol);
229            }
230        }
231        result
232    }
233
234    /// The set of nonterminal symbols.
235    pub fn nonterminals(&self) -> Vec<Symbol> {
236        let mut result = vec![];
237        for symbol in self.symbols() {
238            if symbol.is_nonterminal() {
239                result.push(symbol);
240            }
241        }
242        result
243    }
244
245    /// The set of rules.
246    pub fn rules(&self) -> Vec<Rule> {
247        let mut result = vec![];
248        for index in 0..self.rules.len() {
249            result.push(Rule {
250                grammar: self,
251                index: RuleIndex(index),
252            });
253        }
254        result
255    }
256
257    /// Fetch the symbol with the given name if it exists.
258    pub fn symbol<S: AsRef<str>>(&self, name: S) -> Option<Symbol> {
259        for (index, symbol) in self.symbols.iter().enumerate() {
260            if symbol.name == name.as_ref() {
261                return Some(Symbol {
262                    grammar: self,
263                    index: SymbolIndex(index),
264                });
265            }
266        }
267        None
268    }
269}
270
271impl GrammarBuilder {
272    /// Declare a symbol.
273    pub fn symbol<S: Into<String>>(mut self, name: S) -> Self {
274        let name: String = name.into();
275        assert!(!self.symbols.iter().any(|symbol_data| symbol_data.name == name), "Symbol declared twice: {name}");
276        self.symbols.push(SymbolData {
277            name,
278        });
279        self
280    }
281
282    /// Declare a rule for this grammar.
283    pub fn rule<S: AsRef<str>>(mut self, lhs: S, rhs: &[S]) -> Self {
284        let rhs: Vec<SymbolIndex> = rhs.iter().map(|symbol_name| self.symbol_index(symbol_name.as_ref())).collect();
285
286        self.rules.push(RuleData {
287            lhs: self.symbol_index(lhs.as_ref()),
288            rhs,
289        });
290        self
291    }
292
293    /// Finish building this object and return the result as a `Grammar`.
294    pub fn build(self) -> Grammar {
295        Grammar {
296            symbols: self.symbols,
297            rules: self.rules,
298        }
299    }
300
301    fn symbol_index(&self, symbol_name: &str) -> SymbolIndex {
302        for (i, symbol) in self.symbols.iter().enumerate() {
303            if symbol.name == symbol_name {
304                return SymbolIndex(i);
305            }
306        }
307        panic!("No such symbol: {symbol_name}")
308    }
309}