1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
use super::utils::{
ParsedRhs, parse_inference_rule, parse_nonterminal, parse_production, parse_rhs,
};
use crate::logic::grammar::{Grammar, Production, TypingRule};
impl Grammar {
/// Parse the textual specification into a `Grammar`.
pub fn load(input: &str) -> Result<Grammar, String> {
let mut grammar = Grammar::new();
// Track first-seen order of nonterminals to pick a deterministic start symbol
let mut nt_order: Vec<String> = Vec::new();
// Split input into blocks separated by blank (or whitespace-only) lines
let mut blocks = Vec::new();
let mut current = Vec::new();
for line in input.lines() {
if line.trim().is_empty() {
if !current.is_empty() {
blocks.push(current.join("\n"));
current.clear();
}
} else {
current.push(line);
}
}
if !current.is_empty() {
blocks.push(current.join("\n"));
}
for block in blocks {
let lines: Vec<&str> = block
.lines()
.map(str::trim)
.filter(|line| !line.is_empty() && !line.starts_with("//"))
.collect();
if lines.is_empty() {
continue;
}
// Check if this block contains a production rule
if lines.iter().any(|line| line.contains("::=")) {
// Production block - may contain multiple productions
let mut i = 0;
while i < lines.len() {
let line = lines[i];
if line.contains("::=") {
// Start of a new production
let mut production_lines = vec![line];
i += 1;
// Collect any continuation lines starting with |
while i < lines.len() && lines[i].starts_with('|') {
production_lines.push(lines[i]);
i += 1;
}
// Parse this production
let production_str = production_lines.join("\n");
let (lhs_str, rhs_str) =
parse_production(&production_str.replace('\n', " "))?;
let (name, rule_name) = parse_nonterminal(&lhs_str)?;
let parsed_rhs = parse_rhs(&rhs_str)?;
let ParsedRhs {
alternatives,
literal_tokens,
} = parsed_rhs;
// Record first time we see this nonterminal (declaration order)
if !nt_order.contains(&name) {
nt_order.push(name.clone());
}
for literal in literal_tokens {
grammar.add_special(literal);
}
// Create productions for each alternative
if let Some(rule_name) = rule_name.clone() {
grammar.set_nonterminal_rule(name.clone(), rule_name)?;
}
for alt_symbols in alternatives.into_iter() {
let production = Production { rhs: alt_symbols };
grammar.add_production(name.clone(), production);
}
} else {
i += 1;
}
}
} else {
let (premises, conclusion, name) = parse_inference_rule(&lines)?;
grammar.add_typing_rule(TypingRule::new(premises, conclusion, name)?);
}
}
// By convention, set the start symbol to the last declared production LHS
if grammar.start().is_none()
&& let Some(last) = nt_order.last()
{
grammar.set_start(last.clone());
}
// Add synthetic typing rules for unary nonterminal wrappers.
grammar = super::fill::fill(grammar);
// Build the binding map
grammar.build_bindings();
// Prepare the tokenizer regexes
grammar.build_tokenizer();
Ok(grammar)
}
}