1mod expand_repeats;
2mod expand_tokens;
3mod extract_default_aliases;
4mod extract_tokens;
5mod flatten_grammar;
6mod intern_symbols;
7mod process_inlines;
8
9use std::{
10 cmp::Ordering,
11 collections::{hash_map, BTreeSet, HashMap, HashSet},
12 mem,
13};
14
15pub use expand_tokens::ExpandTokensError;
16pub use extract_tokens::ExtractTokensError;
17pub use flatten_grammar::FlattenGrammarError;
18use indexmap::IndexMap;
19pub use intern_symbols::InternSymbolsError;
20pub use process_inlines::ProcessInlinesError;
21use serde::Serialize;
22use thiserror::Error;
23
24pub use self::expand_tokens::expand_tokens;
25use self::{
26 expand_repeats::expand_repeats, extract_default_aliases::extract_default_aliases,
27 extract_tokens::extract_tokens, flatten_grammar::flatten_grammar,
28 intern_symbols::intern_symbols, process_inlines::process_inlines,
29};
30use super::{
31 grammars::{
32 ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
33 SyntaxGrammar, Variable,
34 },
35 rules::{AliasMap, Precedence, Rule, Symbol},
36};
37use crate::grammars::ReservedWordContext;
38
39pub struct IntermediateGrammar<T, U> {
40 variables: Vec<Variable>,
41 extra_symbols: Vec<T>,
42 expected_conflicts: Vec<Vec<Symbol>>,
43 precedence_orderings: Vec<Vec<PrecedenceEntry>>,
44 external_tokens: Vec<U>,
45 variables_to_inline: Vec<Symbol>,
46 supertype_symbols: Vec<Symbol>,
47 word_token: Option<Symbol>,
48 reserved_word_sets: Vec<ReservedWordContext<T>>,
49}
50
51pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;
52
53pub type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
54
55#[derive(Debug, PartialEq, Eq)]
56pub struct ExtractedLexicalGrammar {
57 pub variables: Vec<Variable>,
58 pub separators: Vec<Rule>,
59}
60
61impl<T, U> Default for IntermediateGrammar<T, U> {
62 fn default() -> Self {
63 Self {
64 variables: Vec::default(),
65 extra_symbols: Vec::default(),
66 expected_conflicts: Vec::default(),
67 precedence_orderings: Vec::default(),
68 external_tokens: Vec::default(),
69 variables_to_inline: Vec::default(),
70 supertype_symbols: Vec::default(),
71 word_token: Option::default(),
72 reserved_word_sets: Vec::default(),
73 }
74 }
75}
76
77pub type PrepareGrammarResult<T> = Result<T, PrepareGrammarError>;
78
79#[derive(Debug, Error, Serialize)]
80#[error(transparent)]
81pub enum PrepareGrammarError {
82 ValidatePrecedences(#[from] ValidatePrecedenceError),
83 ValidateIndirectRecursion(#[from] IndirectRecursionError),
84 InternSymbols(#[from] InternSymbolsError),
85 ExtractTokens(#[from] ExtractTokensError),
86 FlattenGrammar(#[from] FlattenGrammarError),
87 ExpandTokens(#[from] ExpandTokensError),
88 ProcessInlines(#[from] ProcessInlinesError),
89}
90
91pub type ValidatePrecedenceResult<T> = Result<T, ValidatePrecedenceError>;
92
93#[derive(Debug, Error, Serialize)]
94#[error(transparent)]
95pub enum ValidatePrecedenceError {
96 Undeclared(#[from] UndeclaredPrecedenceError),
97 Ordering(#[from] ConflictingPrecedenceOrderingError),
98}
99
100#[derive(Debug, Error, Serialize)]
101pub struct IndirectRecursionError(pub Vec<String>);
102
103impl std::fmt::Display for IndirectRecursionError {
104 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105 write!(f, "Grammar contains an indirectly recursive rule: ")?;
106 for (i, symbol) in self.0.iter().enumerate() {
107 if i > 0 {
108 write!(f, " -> ")?;
109 }
110 write!(f, "{symbol}")?;
111 }
112 Ok(())
113 }
114}
115
116#[derive(Debug, Error, Serialize)]
117pub struct UndeclaredPrecedenceError {
118 pub precedence: String,
119 pub rule: String,
120}
121
122impl std::fmt::Display for UndeclaredPrecedenceError {
123 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124 write!(
125 f,
126 "Undeclared precedence '{}' in rule '{}'",
127 self.precedence, self.rule
128 )?;
129 Ok(())
130 }
131}
132
133#[derive(Debug, Error, Serialize)]
134pub struct ConflictingPrecedenceOrderingError {
135 pub precedence_1: String,
136 pub precedence_2: String,
137}
138
139impl std::fmt::Display for ConflictingPrecedenceOrderingError {
140 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
141 write!(
142 f,
143 "Conflicting orderings for precedences {} and {}",
144 self.precedence_1, self.precedence_2
145 )?;
146 Ok(())
147 }
148}
149
150pub fn prepare_grammar(
153 input_grammar: &InputGrammar,
154) -> PrepareGrammarResult<(
155 SyntaxGrammar,
156 LexicalGrammar,
157 InlinedProductionMap,
158 AliasMap,
159)> {
160 validate_precedences(input_grammar)?;
161 validate_indirect_recursion(input_grammar)?;
162
163 let interned_grammar = intern_symbols(input_grammar)?;
164 let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
165 let syntax_grammar = expand_repeats(syntax_grammar);
166 let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
167 let lexical_grammar = expand_tokens(lexical_grammar)?;
168 let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
169 let inlines = process_inlines(&syntax_grammar, &lexical_grammar)?;
170 Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
171}
172
173fn validate_indirect_recursion(grammar: &InputGrammar) -> Result<(), IndirectRecursionError> {
177 let mut epsilon_transitions: IndexMap<&str, BTreeSet<String>> = IndexMap::new();
178
179 for variable in &grammar.variables {
180 let productions = get_single_symbol_productions(&variable.rule);
181 let filtered: BTreeSet<String> = productions
184 .into_iter()
185 .filter(|s| s != &variable.name)
186 .collect();
187 epsilon_transitions.insert(variable.name.as_str(), filtered);
188 }
189
190 for start_symbol in epsilon_transitions.keys() {
191 let mut visited = BTreeSet::new();
192 let mut path = Vec::new();
193 if let Some((start_idx, end_idx)) =
194 get_cycle(start_symbol, &epsilon_transitions, &mut visited, &mut path)
195 {
196 let cycle_symbols = path[start_idx..=end_idx]
197 .iter()
198 .map(|s| (*s).to_string())
199 .collect();
200 return Err(IndirectRecursionError(cycle_symbols));
201 }
202 }
203
204 Ok(())
205}
206
207fn get_single_symbol_productions(rule: &Rule) -> BTreeSet<String> {
208 match rule {
209 Rule::NamedSymbol(name) => BTreeSet::from([name.clone()]),
210 Rule::Choice(choices) => choices
211 .iter()
212 .flat_map(get_single_symbol_productions)
213 .collect(),
214 Rule::Metadata { rule, .. } => get_single_symbol_productions(rule),
215 _ => BTreeSet::new(),
216 }
217}
218
219fn get_cycle<'a>(
221 current: &'a str,
222 transitions: &'a IndexMap<&'a str, BTreeSet<String>>,
223 visited: &mut BTreeSet<&'a str>,
224 path: &mut Vec<&'a str>,
225) -> Option<(usize, usize)> {
226 if let Some(first_idx) = path.iter().position(|s| *s == current) {
227 path.push(current);
228 return Some((first_idx, path.len() - 1));
229 }
230
231 if visited.contains(current) {
232 return None;
233 }
234
235 path.push(current);
236 visited.insert(current);
237
238 if let Some(next_symbols) = transitions.get(current) {
239 for next in next_symbols {
240 if let Some(cycle) = get_cycle(next, transitions, visited, path) {
241 return Some(cycle);
242 }
243 }
244 }
245
246 path.pop();
247 None
248}
249
250fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()> {
254 fn validate(
257 rule_name: &str,
258 rule: &Rule,
259 names: &HashSet<&String>,
260 ) -> ValidatePrecedenceResult<()> {
261 match rule {
262 Rule::Repeat(rule) => validate(rule_name, rule, names),
263 Rule::Seq(elements) | Rule::Choice(elements) => elements
264 .iter()
265 .try_for_each(|e| validate(rule_name, e, names)),
266 Rule::Metadata { rule, params } => {
267 if let Precedence::Name(n) = ¶ms.precedence {
268 if !names.contains(n) {
269 Err(UndeclaredPrecedenceError {
270 precedence: n.clone(),
271 rule: rule_name.to_string(),
272 })?;
273 }
274 }
275 validate(rule_name, rule, names)?;
276 Ok(())
277 }
278 _ => Ok(()),
279 }
280 }
281
282 let mut pairs = HashMap::new();
285 for list in &grammar.precedence_orderings {
286 for (i, mut entry1) in list.iter().enumerate() {
287 for mut entry2 in list.iter().skip(i + 1) {
288 if entry2 == entry1 {
289 continue;
290 }
291 let mut ordering = Ordering::Greater;
292 if entry1 > entry2 {
293 ordering = Ordering::Less;
294 mem::swap(&mut entry1, &mut entry2);
295 }
296 match pairs.entry((entry1, entry2)) {
297 hash_map::Entry::Vacant(e) => {
298 e.insert(ordering);
299 }
300 hash_map::Entry::Occupied(e) => {
301 if e.get() != &ordering {
302 Err(ConflictingPrecedenceOrderingError {
303 precedence_1: entry1.to_string(),
304 precedence_2: entry2.to_string(),
305 })?;
306 }
307 }
308 }
309 }
310 }
311 }
312
313 let precedence_names = grammar
314 .precedence_orderings
315 .iter()
316 .flat_map(|l| l.iter())
317 .filter_map(|p| {
318 if let PrecedenceEntry::Name(n) = p {
319 Some(n)
320 } else {
321 None
322 }
323 })
324 .collect::<HashSet<&String>>();
325 for variable in &grammar.variables {
326 validate(&variable.name, &variable.rule, &precedence_names)?;
327 }
328
329 Ok(())
330}
331
332#[cfg(test)]
333mod tests {
334 use super::*;
335 use crate::grammars::VariableType;
336
337 #[test]
338 fn test_validate_precedences_with_undeclared_precedence() {
339 let grammar = InputGrammar {
340 precedence_orderings: vec![
341 vec![
342 PrecedenceEntry::Name("a".to_string()),
343 PrecedenceEntry::Name("b".to_string()),
344 ],
345 vec![
346 PrecedenceEntry::Name("b".to_string()),
347 PrecedenceEntry::Name("c".to_string()),
348 PrecedenceEntry::Name("d".to_string()),
349 ],
350 ],
351 variables: vec![
352 Variable {
353 name: "v1".to_string(),
354 kind: VariableType::Named,
355 rule: Rule::Seq(vec![
356 Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
357 Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
358 ]),
359 },
360 Variable {
361 name: "v2".to_string(),
362 kind: VariableType::Named,
363 rule: Rule::repeat(Rule::Choice(vec![
364 Rule::prec_left(Precedence::Name("omg".to_string()), Rule::string("y")),
365 Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
366 ])),
367 },
368 ],
369 ..Default::default()
370 };
371
372 let result = validate_precedences(&grammar);
373 assert_eq!(
374 result.unwrap_err().to_string(),
375 "Undeclared precedence 'omg' in rule 'v2'",
376 );
377 }
378
379 #[test]
380 fn test_validate_precedences_with_conflicting_order() {
381 let grammar = InputGrammar {
382 precedence_orderings: vec![
383 vec![
384 PrecedenceEntry::Name("a".to_string()),
385 PrecedenceEntry::Name("b".to_string()),
386 ],
387 vec![
388 PrecedenceEntry::Name("b".to_string()),
389 PrecedenceEntry::Name("c".to_string()),
390 PrecedenceEntry::Name("a".to_string()),
391 ],
392 ],
393 variables: vec![
394 Variable {
395 name: "v1".to_string(),
396 kind: VariableType::Named,
397 rule: Rule::Seq(vec![
398 Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
399 Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
400 ]),
401 },
402 Variable {
403 name: "v2".to_string(),
404 kind: VariableType::Named,
405 rule: Rule::repeat(Rule::Choice(vec![
406 Rule::prec_left(Precedence::Name("a".to_string()), Rule::string("y")),
407 Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
408 ])),
409 },
410 ],
411 ..Default::default()
412 };
413
414 let result = validate_precedences(&grammar);
415 assert_eq!(
416 result.unwrap_err().to_string(),
417 "Conflicting orderings for precedences 'a' and 'b'",
418 );
419 }
420}