1use crate::ir::{Combinator, RuleDef};
8use std::collections::{HashMap, HashSet};
9
10#[derive(Debug, Clone)]
12pub enum ValidationError {
13 NullableLoop {
15 rule_name: String,
16 description: String,
17 },
18 LeftRecursion {
20 rule_name: String,
21 path: Vec<String>,
22 },
23}
24
25impl std::fmt::Display for ValidationError {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 match self {
28 ValidationError::NullableLoop {
29 rule_name,
30 description,
31 } => {
32 write!(f, "Nullable loop in rule '{}': {}", rule_name, description)
33 }
34 ValidationError::LeftRecursion { rule_name, path } => {
35 write!(
36 f,
37 "Left recursion in rule '{}': {}",
38 rule_name,
39 path.join(" -> ")
40 )
41 }
42 }
43 }
44}
45
46pub fn validate_grammar(rules: &[RuleDef]) -> Vec<ValidationError> {
48 let mut errors = Vec::new();
49
50 let rule_map: HashMap<&str, &Combinator> = rules
52 .iter()
53 .map(|r| (r.name.as_str(), &r.combinator))
54 .collect();
55
56 for rule in rules {
58 check_nullable_loops(&rule.name, &rule.combinator, &rule_map, &mut errors);
60
61 check_left_recursion(&rule.name, &rule.combinator, &rule_map, &mut errors);
63 }
64
65 errors
66}
67
68pub fn is_nullable(
70 comb: &Combinator,
71 rule_map: &HashMap<&str, &Combinator>,
72 visited: &mut HashSet<String>,
73) -> bool {
74 match comb {
75 Combinator::Literal(s) => s.is_empty(), Combinator::Char(_) => false,
78 Combinator::CharClass(_) => false,
79 Combinator::CharRange(_, _) => false,
80 Combinator::AnyChar => false,
81
82 Combinator::NotFollowedBy(_) => true,
84 Combinator::FollowedBy(_) => true,
85
86 Combinator::Capture(inner) => is_nullable(inner, rule_map, visited),
88
89 Combinator::Rule(name) => {
91 if visited.contains(name) {
92 return false; }
94 visited.insert(name.clone());
95 if let Some(rule_comb) = rule_map.get(name.as_str()) {
96 is_nullable(rule_comb, rule_map, visited)
97 } else {
98 false
99 }
100 }
101
102 Combinator::Sequence(items) => items
104 .iter()
105 .all(|item| is_nullable(item, rule_map, visited)),
106 Combinator::Choice(items) => items
107 .iter()
108 .any(|item| is_nullable(item, rule_map, visited)),
109 Combinator::ZeroOrMore(_) => true,
110 Combinator::OneOrMore(inner) => is_nullable(inner, rule_map, visited),
111 Combinator::Optional(_) => true,
112 Combinator::Skip(inner) => is_nullable(inner, rule_map, visited),
113 Combinator::SeparatedBy { .. } => false, Combinator::Pratt(pratt) => {
115 if let Some(ref operand) = *pratt.operand {
116 is_nullable(operand, rule_map, visited)
117 } else {
118 false
119 }
120 }
121 Combinator::Mapped { inner, .. } | Combinator::Memoize { inner, .. } => {
122 is_nullable(inner, rule_map, visited)
123 }
124 }
125}
126
127fn check_nullable_loops(
129 rule_name: &str,
130 comb: &Combinator,
131 rule_map: &HashMap<&str, &Combinator>,
132 errors: &mut Vec<ValidationError>,
133) {
134 match comb {
135 Combinator::ZeroOrMore(inner) | Combinator::OneOrMore(inner) => {
136 let mut visited = HashSet::new();
137 if is_nullable(inner, rule_map, &mut visited) {
138 errors.push(ValidationError::NullableLoop {
139 rule_name: rule_name.to_string(),
140 description: "Loop body can match empty input".to_string(),
141 });
142 }
143 check_nullable_loops(rule_name, inner, rule_map, errors);
144 }
145 Combinator::SeparatedBy {
146 item, separator, ..
147 } => {
148 let mut visited = HashSet::new();
149 if is_nullable(item, rule_map, &mut visited) {
150 errors.push(ValidationError::NullableLoop {
151 rule_name: rule_name.to_string(),
152 description: "SeparatedBy item can match empty input".to_string(),
153 });
154 }
155 visited.clear();
156 if is_nullable(separator, rule_map, &mut visited) {
157 errors.push(ValidationError::NullableLoop {
158 rule_name: rule_name.to_string(),
159 description: "SeparatedBy separator can match empty input".to_string(),
160 });
161 }
162 check_nullable_loops(rule_name, item, rule_map, errors);
163 check_nullable_loops(rule_name, separator, rule_map, errors);
164 }
165 Combinator::Sequence(items) | Combinator::Choice(items) => {
166 for item in items {
167 check_nullable_loops(rule_name, item, rule_map, errors);
168 }
169 }
170 Combinator::Optional(inner)
171 | Combinator::Skip(inner)
172 | Combinator::Capture(inner)
173 | Combinator::NotFollowedBy(inner)
174 | Combinator::FollowedBy(inner) => {
175 check_nullable_loops(rule_name, inner, rule_map, errors);
176 }
177 Combinator::Pratt(pratt) => {
178 if let Some(ref operand) = *pratt.operand {
179 check_nullable_loops(rule_name, operand, rule_map, errors);
180 }
181 }
182 Combinator::Mapped { inner, .. } | Combinator::Memoize { inner, .. } => {
183 check_nullable_loops(rule_name, inner, rule_map, errors);
184 }
185 Combinator::Rule(_)
187 | Combinator::Literal(_)
188 | Combinator::Char(_)
189 | Combinator::CharClass(_)
190 | Combinator::CharRange(_, _)
191 | Combinator::AnyChar => {}
192 }
193}
194
195fn check_left_recursion(
197 rule_name: &str,
198 comb: &Combinator,
199 rule_map: &HashMap<&str, &Combinator>,
200 errors: &mut Vec<ValidationError>,
201) {
202 let mut path = vec![rule_name.to_string()];
203 let mut visited = HashSet::new();
204 visited.insert(rule_name.to_string());
205
206 if has_left_recursion(rule_name, comb, rule_map, &mut path, &mut visited) {
207 errors.push(ValidationError::LeftRecursion {
208 rule_name: rule_name.to_string(),
209 path,
210 });
211 }
212}
213
214fn has_left_recursion(
216 target_rule: &str,
217 comb: &Combinator,
218 rule_map: &HashMap<&str, &Combinator>,
219 path: &mut Vec<String>,
220 visited: &mut HashSet<String>,
221) -> bool {
222 match comb {
223 Combinator::Rule(name) => {
224 if name == target_rule {
225 return true;
226 }
227 if visited.contains(name) {
228 return false;
229 }
230 visited.insert(name.clone());
231 path.push(name.clone());
232
233 if let Some(rule_comb) = rule_map.get(name.as_str()) {
234 let result = has_left_recursion(target_rule, rule_comb, rule_map, path, visited);
235 if !result {
236 path.pop();
237 }
238 result
239 } else {
240 path.pop();
241 false
242 }
243 }
244 Combinator::Sequence(items) => {
245 for item in items {
246 if has_left_recursion(target_rule, item, rule_map, path, visited) {
247 return true;
248 }
249 let mut null_visited = HashSet::new();
250 if !is_nullable(item, rule_map, &mut null_visited) {
251 break;
252 }
253 }
254 false
255 }
256 Combinator::Choice(items) => {
257 for item in items {
258 if has_left_recursion(target_rule, item, rule_map, path, visited) {
259 return true;
260 }
261 }
262 false
263 }
264 Combinator::Optional(inner) | Combinator::ZeroOrMore(inner) => {
265 has_left_recursion(target_rule, inner, rule_map, path, visited)
266 }
267 Combinator::OneOrMore(inner)
268 | Combinator::Skip(inner)
269 | Combinator::Capture(inner)
270 | Combinator::Mapped { inner, .. }
271 | Combinator::Memoize { inner, .. } => {
272 has_left_recursion(target_rule, inner, rule_map, path, visited)
273 }
274 Combinator::NotFollowedBy(_) | Combinator::FollowedBy(_) => false,
276 Combinator::SeparatedBy { item, .. } => {
277 has_left_recursion(target_rule, item, rule_map, path, visited)
278 }
279 Combinator::Pratt(pratt) => {
280 if let Some(ref operand) = *pratt.operand {
281 has_left_recursion(target_rule, operand, rule_map, path, visited)
282 } else {
283 false
284 }
285 }
286 Combinator::Literal(s) if !s.is_empty() => false,
288 Combinator::Literal(_) => true, Combinator::Char(_)
290 | Combinator::CharClass(_)
291 | Combinator::CharRange(_, _)
292 | Combinator::AnyChar => false,
293 }
294}
295
296#[cfg(test)]
297mod tests {
298 use super::*;
299 use crate::ir::Combinator;
300
301 #[test]
302 fn test_nullable_literal() {
303 let rule_map = HashMap::new();
304 let mut visited = HashSet::new();
305 assert!(!is_nullable(
306 &Combinator::Literal("foo".to_string()),
307 &rule_map,
308 &mut visited
309 ));
310
311 visited.clear();
312 assert!(is_nullable(
313 &Combinator::Literal("".to_string()),
314 &rule_map,
315 &mut visited
316 ));
317 }
318
319 #[test]
320 fn test_nullable_optional() {
321 let rule_map = HashMap::new();
322 let mut visited = HashSet::new();
323 let comb = Combinator::Optional(Box::new(Combinator::Literal("foo".to_string())));
324 assert!(is_nullable(&comb, &rule_map, &mut visited));
325 }
326
327 #[test]
328 fn test_nullable_zero_or_more() {
329 let rule_map = HashMap::new();
330 let mut visited = HashSet::new();
331 let comb = Combinator::ZeroOrMore(Box::new(Combinator::Literal("foo".to_string())));
332 assert!(is_nullable(&comb, &rule_map, &mut visited));
333 }
334
335 #[test]
336 fn test_nullable_char_class() {
337 let rule_map = HashMap::new();
338 let mut visited = HashSet::new();
339 assert!(!is_nullable(
340 &Combinator::CharClass(crate::ir::CharClass::Digit),
341 &rule_map,
342 &mut visited
343 ));
344 }
345}