rustemo_compiler/grammar/
mod.rs1use std::{
2 cell::Cell,
3 collections::BTreeMap,
4 fmt::Display,
5 hash::{Hash, Hasher},
6 str::FromStr,
7};
8
9use rustemo::{Error, Parser, Result};
10
11use crate::{
12 index::{
13 NonTermIndex, NonTermVec, ProdIndex, ProdVec, SymbolIndex, SymbolVec, TermIndex, TermVec,
14 },
15 lang::{rustemo::RustemoParser, rustemo_actions::Name},
16};
17
18use self::builder::GrammarBuilder;
19
20use super::lang::rustemo_actions::{
21 GrammarSymbol, Imports, ProdMetaDatas, Recognizer, TermMetaDatas,
22};
23
24pub(crate) mod builder;
25#[cfg(test)]
26mod tests;
27pub(crate) mod types;
28
29#[derive(Debug)]
30pub struct Grammar {
31 pub imports: Imports,
32 pub productions: ProdVec<Production>,
33
34 pub terminals: TermVec<Terminal>,
35 pub nonterminals: NonTermVec<NonTerminal>,
36 pub nonterm_by_name: BTreeMap<String, SymbolIndex>,
37 pub term_by_name: BTreeMap<String, SymbolIndex>,
38 pub empty_index: SymbolIndex,
40 pub stop_index: SymbolIndex,
42 pub augmented_index: SymbolIndex,
44 pub augmented_layout_index: Option<SymbolIndex>,
46 pub start_index: SymbolIndex,
48}
49
50macro_rules! grammar_elem {
51 ($name:ident) => {
52 impl PartialEq for $name {
53 fn eq(&self, other: &Self) -> bool {
54 self.idx == other.idx
55 }
56 }
57 impl Eq for $name {}
58 impl Hash for $name {
59 fn hash<H: Hasher>(&self, state: &mut H) {
60 self.idx.hash(state);
61 }
62 }
63
64 impl PartialOrd for $name {
65 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
66 Some(self.cmp(other))
67 }
68 }
69
70 impl Ord for $name {
71 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
72 self.idx.cmp(&other.idx)
73 }
74 }
75 };
76}
77
78#[derive(Debug, Default)]
79pub struct Terminal {
80 pub idx: TermIndex,
81 pub name: String,
82 pub annotation: Option<String>,
83 pub recognizer: Option<Recognizer>,
84
85 pub has_content: bool,
88
89 pub reachable: Cell<bool>,
92
93 pub prio: Priority,
95
96 pub assoc: Associativity,
98
99 pub meta: TermMetaDatas,
100}
101grammar_elem!(Terminal);
102
103#[derive(Debug, Default)]
104pub struct NonTerminal {
105 pub idx: NonTermIndex,
106 pub name: String,
107 pub annotation: Option<String>,
108 pub productions: Vec<ProdIndex>,
109
110 pub reachable: Cell<bool>,
113}
114grammar_elem!(NonTerminal);
115
116impl NonTerminal {
117 #[inline]
118 pub fn productions<'a>(&self, grammar: &'a Grammar) -> Vec<&'a Production> {
119 self.productions
120 .iter()
121 .map(|&idx| &grammar.productions[idx])
122 .collect()
123 }
124}
125
126impl Display for Grammar {
127 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
128 writeln!(f, "\nGRAMMAR [")?;
129 writeln!(f, "\nTerminals:")?;
130 for terminal in &self.terminals {
131 writeln!(f, "{}. {}", terminal.idx, terminal.name)?;
132 }
133 writeln!(f, "\nNonTerminals:")?;
134 for nonterminal in &self.nonterminals {
135 writeln!(
136 f,
137 "{} ({}). {}",
138 nonterminal.idx,
139 self.nonterm_to_symbol_index(nonterminal.idx),
140 nonterminal.name
141 )?;
142 }
143 writeln!(f, "\nProductions:")?;
144 for production in &self.productions {
145 write!(
146 f,
147 "{}. {}: ",
148 production.idx, self.nonterminals[production.nonterminal].name
149 )?;
150 for assignment in &production.rhs {
151 write!(f, "{} ", self.symbol_name(res_symbol(assignment)))?;
152 }
153 writeln!(f)?;
154 }
155
156 writeln!(f, "\n] GRAMMAR")
157 }
158}
159
160#[derive(Debug, Default, PartialEq, Eq)]
161pub enum Associativity {
162 #[default]
163 None,
164 Left,
165 Right,
166}
167
168pub type Priority = u32;
169pub const DEFAULT_PRIORITY: u32 = 10;
170
171#[derive(Debug)]
172pub struct Production {
173 pub idx: ProdIndex,
174 pub nonterminal: NonTermIndex,
175 pub ntidx: usize,
176 pub kind: Option<String>,
177 pub rhs: Vec<ResolvingAssignment>,
178 pub assoc: Associativity,
179 pub prio: Priority,
180 pub dynamic: bool,
181 pub nops: bool,
182 pub nopse: bool,
183 pub meta: ProdMetaDatas,
184}
185grammar_elem!(Production);
186
187impl Default for Production {
188 fn default() -> Self {
189 Self {
190 idx: ProdIndex(usize::MAX),
192 nonterminal: NonTermIndex(usize::MAX),
193
194 ntidx: 0,
195 kind: None,
196 rhs: Default::default(),
197 assoc: Default::default(),
198 prio: DEFAULT_PRIORITY,
199 dynamic: Default::default(),
200 nops: Default::default(),
201 nopse: Default::default(),
202 meta: Default::default(),
203 }
204 }
205}
206
207impl Production {
208 #[inline]
209 pub fn rhs_symbols(&self) -> Vec<SymbolIndex> {
210 self.rhs.iter().map(res_symbol).collect()
211 }
212
213 #[inline]
215 pub fn rhs_assign(&self) -> Vec<Assignment> {
216 self.rhs
217 .iter()
218 .enumerate()
219 .map(|(idx, a)| Assignment {
220 name: a.name.clone(),
221 symbol: res_symbol(a),
222 is_bool: a.is_bool,
223 idx,
224 })
225 .collect()
226 }
227
228 pub fn rhs_with_content(&self, grammar: &Grammar) -> Vec<Assignment> {
231 self.rhs_assign()
232 .into_iter()
233 .filter(|a| grammar.symbol_has_content(a.symbol))
234 .collect::<Vec<_>>()
235 }
236
237 #[inline]
238 pub fn rhs_symbol(&self, pos: usize) -> SymbolIndex {
239 res_symbol(&self.rhs[pos])
240 }
241
242 #[inline]
243 pub fn nonterminal<'a>(&self, grammar: &'a Grammar) -> &'a NonTerminal {
244 &grammar.nonterminals[self.nonterminal]
245 }
246
247 pub fn to_string(&self, grammar: &Grammar) -> String {
248 format!(
249 "{}: {}",
250 self.nonterminal(grammar).name,
251 grammar.symbol_names(self.rhs_symbols()).join(" ")
252 )
253 }
254}
255
256impl Display for Production {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 write!(f, "{}:", self.idx)?;
259 for assign in &self.rhs {
260 write!(f, " ")?;
261 if assign.name.is_some() {
262 write!(f, "{}=", assign.name.as_ref().unwrap())?;
263 }
264 write!(
265 f,
266 "{}",
267 match &assign.symbol.symbol {
268 GrammarSymbol::Name(name) => name.as_ref().into(),
269 GrammarSymbol::StrConst(mtch) => {
270 format!("\"{}\"", mtch.as_ref())
271 }
272 }
273 )?;
274 }
275 Ok(())
276 }
277}
278
279#[derive(Debug)]
280pub struct ResolvingSymbolIndex {
281 index: Option<SymbolIndex>,
282 symbol: GrammarSymbol,
283}
284
285#[derive(Debug)]
286pub struct ResolvingAssignment {
287 pub name: Option<Name>,
288 pub symbol: ResolvingSymbolIndex,
289 pub is_bool: bool,
290}
291
292#[derive(Debug)]
293pub struct Assignment {
294 pub name: Option<Name>,
295 pub symbol: SymbolIndex,
296 pub is_bool: bool,
298 pub idx: usize,
300}
301
302#[inline]
304pub(crate) fn res_symbol(assign: &ResolvingAssignment) -> SymbolIndex {
305 assign
306 .symbol
307 .index
308 .unwrap_or_else(|| panic!("Unresolved symbol {:?}", &assign.symbol.symbol))
309}
310
311impl FromStr for Grammar {
322 type Err = Error;
323
324 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
325 Grammar::from_string(s)
326 }
327}
328
329impl Grammar {
330 fn from_string<G: AsRef<str>>(grammar_str: G) -> Result<Self> {
332 GrammarBuilder::new()
333 .try_from_file(RustemoParser::new().parse(grammar_str.as_ref())?, None)
334 }
335
336 pub(crate) fn new_termvec<T: Clone>(&self, default: T) -> TermVec<T> {
350 TermVec(vec![default; self.terminals.len()])
351 }
352
353 pub(crate) fn new_nontermvec<T: Clone>(&self, default: T) -> NonTermVec<T> {
354 NonTermVec(vec![default; self.nonterminals.len()])
355 }
356
357 pub fn symbol_index(&self, name: &str) -> SymbolIndex {
358 *self.term_by_name.get(name).unwrap_or_else(|| {
359 self.nonterm_by_name.get(name).unwrap_or_else(|| {
360 panic!("No Symbol by name {name:?}");
361 })
362 })
363 }
364
365 pub fn symbol_name(&self, index: SymbolIndex) -> String {
366 if index.0 < self.terminals.len() {
367 self.symbol_to_term(index).name.clone()
368 } else {
369 self.symbol_to_nonterm(index).name.clone()
370 }
371 }
372
373 #[inline]
376 pub fn symbol_has_content(&self, symbol: SymbolIndex) -> bool {
377 !self.is_empty(symbol)
378 && (self.is_nonterm(symbol) || self.symbol_to_term(symbol).has_content)
379 }
380
381 pub fn symbol_indexes(&self, names: &[&str]) -> SymbolVec<SymbolIndex> {
382 let mut indexes = SymbolVec::new();
383 for name in names {
384 indexes.push(self.symbol_index(name))
385 }
386 indexes
387 }
388
389 #[inline]
390 pub fn symbol_names<T>(&self, indexes: T) -> Vec<String>
391 where
392 T: IntoIterator<Item = SymbolIndex>,
393 {
394 indexes.into_iter().map(|i| self.symbol_name(i)).collect()
395 }
396
397 #[inline]
398 pub fn term_to_symbol_index(&self, index: TermIndex) -> SymbolIndex {
399 SymbolIndex(index.0)
400 }
401
402 #[inline]
404 pub fn symbol_to_term_index(&self, index: SymbolIndex) -> TermIndex {
405 TermIndex(index.0)
406 }
407
408 #[inline]
410 pub fn symbol_to_term(&self, index: SymbolIndex) -> &Terminal {
411 &self.terminals[self.symbol_to_term_index(index)]
412 }
413
414 #[inline]
416 pub fn term_by_name(&self, name: &str) -> &Terminal {
417 self.symbol_to_term(self.symbol_index(name))
418 }
419
420 #[inline]
422 pub fn term_by_index(&self, index: TermIndex) -> &Terminal {
423 self.symbol_to_term(self.term_to_symbol_index(index))
424 }
425
426 #[inline]
427 pub fn nonterm_to_symbol_index(&self, index: NonTermIndex) -> SymbolIndex {
428 SymbolIndex(index.0 + self.terminals.len())
429 }
430
431 #[inline]
434 pub fn symbol_to_nonterm_index(&self, index: SymbolIndex) -> NonTermIndex {
435 NonTermIndex(index.0.checked_sub(self.terminals.len()).unwrap())
436 }
437
438 #[inline]
441 pub fn symbol_to_nonterm(&self, index: SymbolIndex) -> &NonTerminal {
442 &self.nonterminals[NonTermIndex(index.0.checked_sub(self.terminals.len()).unwrap())]
443 }
444
445 #[inline]
447 pub fn nonterm_by_name(&self, name: &str) -> &NonTerminal {
448 self.symbol_to_nonterm(self.symbol_index(name))
449 }
450
451 #[inline]
453 pub fn nonterm_by_index(&self, index: NonTermIndex) -> &NonTerminal {
454 self.symbol_to_nonterm(self.nonterm_to_symbol_index(index))
455 }
456
457 #[inline]
458 pub fn is_nonterm(&self, index: SymbolIndex) -> bool {
459 index.0 >= self.terminals.len()
460 }
461
462 #[inline]
463 pub fn is_term(&self, index: SymbolIndex) -> bool {
464 index.0 < self.terminals.len()
465 }
466
467 #[inline]
468 pub fn is_empty(&self, index: SymbolIndex) -> bool {
469 index == self.empty_index
470 }
471
472 #[inline]
473 pub fn production_len(&self, prod: ProdIndex) -> usize {
474 self.productions[prod].rhs.len()
475 }
476
477 #[inline]
478 pub fn production_rhs_symbols(&self, prod: ProdIndex) -> Vec<SymbolIndex> {
479 self.productions[prod].rhs.iter().map(res_symbol).collect()
480 }
481
482 pub fn productions(&self) -> Vec<&Production> {
484 self.productions
485 .iter()
486 .filter(|&p| {
487 let nt_symbol = self.nonterm_to_symbol_index(p.nonterminal);
488 nt_symbol != self.augmented_index && self.augmented_layout_index != Some(nt_symbol)
489 })
490 .collect()
491 }
492
493 pub fn nonterminals(&self) -> Vec<&NonTerminal> {
495 self.nonterminals
496 .iter()
497 .filter(|&n| {
498 let nt_symbol = self.nonterm_to_symbol_index(n.idx);
499 nt_symbol != self.empty_index
500 && nt_symbol != self.augmented_index
501 && self.augmented_layout_index != Some(nt_symbol)
502 })
503 .collect()
504 }
505
506 #[inline]
507 pub fn is_enum(&self, nonterminal: &NonTerminal) -> bool {
508 let prods = nonterminal.productions(self);
509 prods.iter().filter(|x| x.rhs.len() == 1).count() == prods.len()
510 }
511
512 #[inline]
513 pub fn has_layout(&self) -> bool {
514 self.augmented_layout_index.is_some()
515 }
516}