Skip to main content

trampoline_parser/
validation.rs

1//! Grammar validation to detect problematic patterns
2//!
3//! This module provides compile-time validation to detect:
4//! - Nullable loops (loops where inner can match empty input - causes infinite loops)
5//! - Direct left recursion (rules that call themselves without consuming input)
6
7use crate::ir::{Combinator, RuleDef};
8use std::collections::{HashMap, HashSet};
9
10/// Validation errors for grammar
11#[derive(Debug, Clone)]
12pub enum ValidationError {
13    /// A loop combinator (ZeroOrMore, OneOrMore, SeparatedBy) has a nullable inner
14    NullableLoop {
15        rule_name: String,
16        description: String,
17    },
18    /// A rule has direct left recursion
19    LeftRecursion {
20        rule_name: String,
21        path: Vec<String>,
22    },
23}
24
25impl std::fmt::Display for ValidationError {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        match self {
28            ValidationError::NullableLoop {
29                rule_name,
30                description,
31            } => {
32                write!(f, "Nullable loop in rule '{}': {}", rule_name, description)
33            }
34            ValidationError::LeftRecursion { rule_name, path } => {
35                write!(
36                    f,
37                    "Left recursion in rule '{}': {}",
38                    rule_name,
39                    path.join(" -> ")
40                )
41            }
42        }
43    }
44}
45
46/// Validate a grammar and return any errors found
47pub fn validate_grammar(rules: &[RuleDef]) -> Vec<ValidationError> {
48    let mut errors = Vec::new();
49
50    // Build a map of rule names to their combinators for lookup
51    let rule_map: HashMap<&str, &Combinator> = rules
52        .iter()
53        .map(|r| (r.name.as_str(), &r.combinator))
54        .collect();
55
56    // Check each rule
57    for rule in rules {
58        // Check for nullable loops
59        check_nullable_loops(&rule.name, &rule.combinator, &rule_map, &mut errors);
60
61        // Check for left recursion
62        check_left_recursion(&rule.name, &rule.combinator, &rule_map, &mut errors);
63    }
64
65    errors
66}
67
68/// Check if a combinator can match empty input (is nullable)
69pub fn is_nullable(
70    comb: &Combinator,
71    rule_map: &HashMap<&str, &Combinator>,
72    visited: &mut HashSet<String>,
73) -> bool {
74    match comb {
75        // Character-level primitives - never nullable (always consume input)
76        Combinator::Literal(s) => s.is_empty(), // Empty string is nullable
77        Combinator::Char(_) => false,
78        Combinator::CharClass(_) => false,
79        Combinator::CharRange(_, _) => false,
80        Combinator::AnyChar => false,
81
82        // Lookahead - doesn't consume input
83        Combinator::NotFollowedBy(_) => true,
84        Combinator::FollowedBy(_) => true,
85
86        // Capture - nullable if inner is nullable
87        Combinator::Capture(inner) => is_nullable(inner, rule_map, visited),
88
89        // Rule reference
90        Combinator::Rule(name) => {
91            if visited.contains(name) {
92                return false; // Assume not nullable for recursive rules
93            }
94            visited.insert(name.clone());
95            if let Some(rule_comb) = rule_map.get(name.as_str()) {
96                is_nullable(rule_comb, rule_map, visited)
97            } else {
98                false
99            }
100        }
101
102        // Combinators
103        Combinator::Sequence(items) => items
104            .iter()
105            .all(|item| is_nullable(item, rule_map, visited)),
106        Combinator::Choice(items) => items
107            .iter()
108            .any(|item| is_nullable(item, rule_map, visited)),
109        Combinator::ZeroOrMore(_) => true,
110        Combinator::OneOrMore(inner) => is_nullable(inner, rule_map, visited),
111        Combinator::Optional(_) => true,
112        Combinator::Skip(inner) => is_nullable(inner, rule_map, visited),
113        Combinator::SeparatedBy { .. } => false, // Requires at least one item
114        Combinator::Pratt(pratt) => {
115            if let Some(ref operand) = *pratt.operand {
116                is_nullable(operand, rule_map, visited)
117            } else {
118                false
119            }
120        }
121        Combinator::Mapped { inner, .. } | Combinator::Memoize { inner, .. } => {
122            is_nullable(inner, rule_map, visited)
123        }
124    }
125}
126
127/// Check for nullable loops in a combinator
128fn check_nullable_loops(
129    rule_name: &str,
130    comb: &Combinator,
131    rule_map: &HashMap<&str, &Combinator>,
132    errors: &mut Vec<ValidationError>,
133) {
134    match comb {
135        Combinator::ZeroOrMore(inner) | Combinator::OneOrMore(inner) => {
136            let mut visited = HashSet::new();
137            if is_nullable(inner, rule_map, &mut visited) {
138                errors.push(ValidationError::NullableLoop {
139                    rule_name: rule_name.to_string(),
140                    description: "Loop body can match empty input".to_string(),
141                });
142            }
143            check_nullable_loops(rule_name, inner, rule_map, errors);
144        }
145        Combinator::SeparatedBy {
146            item, separator, ..
147        } => {
148            let mut visited = HashSet::new();
149            if is_nullable(item, rule_map, &mut visited) {
150                errors.push(ValidationError::NullableLoop {
151                    rule_name: rule_name.to_string(),
152                    description: "SeparatedBy item can match empty input".to_string(),
153                });
154            }
155            visited.clear();
156            if is_nullable(separator, rule_map, &mut visited) {
157                errors.push(ValidationError::NullableLoop {
158                    rule_name: rule_name.to_string(),
159                    description: "SeparatedBy separator can match empty input".to_string(),
160                });
161            }
162            check_nullable_loops(rule_name, item, rule_map, errors);
163            check_nullable_loops(rule_name, separator, rule_map, errors);
164        }
165        Combinator::Sequence(items) | Combinator::Choice(items) => {
166            for item in items {
167                check_nullable_loops(rule_name, item, rule_map, errors);
168            }
169        }
170        Combinator::Optional(inner)
171        | Combinator::Skip(inner)
172        | Combinator::Capture(inner)
173        | Combinator::NotFollowedBy(inner)
174        | Combinator::FollowedBy(inner) => {
175            check_nullable_loops(rule_name, inner, rule_map, errors);
176        }
177        Combinator::Pratt(pratt) => {
178            if let Some(ref operand) = *pratt.operand {
179                check_nullable_loops(rule_name, operand, rule_map, errors);
180            }
181        }
182        Combinator::Mapped { inner, .. } | Combinator::Memoize { inner, .. } => {
183            check_nullable_loops(rule_name, inner, rule_map, errors);
184        }
185        // Leaf combinators - no nested loops
186        Combinator::Rule(_)
187        | Combinator::Literal(_)
188        | Combinator::Char(_)
189        | Combinator::CharClass(_)
190        | Combinator::CharRange(_, _)
191        | Combinator::AnyChar => {}
192    }
193}
194
195/// Check for left recursion starting from a combinator
196fn check_left_recursion(
197    rule_name: &str,
198    comb: &Combinator,
199    rule_map: &HashMap<&str, &Combinator>,
200    errors: &mut Vec<ValidationError>,
201) {
202    let mut path = vec![rule_name.to_string()];
203    let mut visited = HashSet::new();
204    visited.insert(rule_name.to_string());
205
206    if has_left_recursion(rule_name, comb, rule_map, &mut path, &mut visited) {
207        errors.push(ValidationError::LeftRecursion {
208            rule_name: rule_name.to_string(),
209            path,
210        });
211    }
212}
213
214/// Check if a combinator can reach the given rule name without consuming input
215fn has_left_recursion(
216    target_rule: &str,
217    comb: &Combinator,
218    rule_map: &HashMap<&str, &Combinator>,
219    path: &mut Vec<String>,
220    visited: &mut HashSet<String>,
221) -> bool {
222    match comb {
223        Combinator::Rule(name) => {
224            if name == target_rule {
225                return true;
226            }
227            if visited.contains(name) {
228                return false;
229            }
230            visited.insert(name.clone());
231            path.push(name.clone());
232
233            if let Some(rule_comb) = rule_map.get(name.as_str()) {
234                let result = has_left_recursion(target_rule, rule_comb, rule_map, path, visited);
235                if !result {
236                    path.pop();
237                }
238                result
239            } else {
240                path.pop();
241                false
242            }
243        }
244        Combinator::Sequence(items) => {
245            for item in items {
246                if has_left_recursion(target_rule, item, rule_map, path, visited) {
247                    return true;
248                }
249                let mut null_visited = HashSet::new();
250                if !is_nullable(item, rule_map, &mut null_visited) {
251                    break;
252                }
253            }
254            false
255        }
256        Combinator::Choice(items) => {
257            for item in items {
258                if has_left_recursion(target_rule, item, rule_map, path, visited) {
259                    return true;
260                }
261            }
262            false
263        }
264        Combinator::Optional(inner) | Combinator::ZeroOrMore(inner) => {
265            has_left_recursion(target_rule, inner, rule_map, path, visited)
266        }
267        Combinator::OneOrMore(inner)
268        | Combinator::Skip(inner)
269        | Combinator::Capture(inner)
270        | Combinator::Mapped { inner, .. }
271        | Combinator::Memoize { inner, .. } => {
272            has_left_recursion(target_rule, inner, rule_map, path, visited)
273        }
274        // Lookahead doesn't consume input but can't cause left recursion by itself
275        Combinator::NotFollowedBy(_) | Combinator::FollowedBy(_) => false,
276        Combinator::SeparatedBy { item, .. } => {
277            has_left_recursion(target_rule, item, rule_map, path, visited)
278        }
279        Combinator::Pratt(pratt) => {
280            if let Some(ref operand) = *pratt.operand {
281                has_left_recursion(target_rule, operand, rule_map, path, visited)
282            } else {
283                false
284            }
285        }
286        // Leaf combinators that consume input - stop here
287        Combinator::Literal(s) if !s.is_empty() => false,
288        Combinator::Literal(_) => true, // Empty literal is nullable
289        Combinator::Char(_)
290        | Combinator::CharClass(_)
291        | Combinator::CharRange(_, _)
292        | Combinator::AnyChar => false,
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299    use crate::ir::Combinator;
300
301    #[test]
302    fn test_nullable_literal() {
303        let rule_map = HashMap::new();
304        let mut visited = HashSet::new();
305        assert!(!is_nullable(
306            &Combinator::Literal("foo".to_string()),
307            &rule_map,
308            &mut visited
309        ));
310
311        visited.clear();
312        assert!(is_nullable(
313            &Combinator::Literal("".to_string()),
314            &rule_map,
315            &mut visited
316        ));
317    }
318
319    #[test]
320    fn test_nullable_optional() {
321        let rule_map = HashMap::new();
322        let mut visited = HashSet::new();
323        let comb = Combinator::Optional(Box::new(Combinator::Literal("foo".to_string())));
324        assert!(is_nullable(&comb, &rule_map, &mut visited));
325    }
326
327    #[test]
328    fn test_nullable_zero_or_more() {
329        let rule_map = HashMap::new();
330        let mut visited = HashSet::new();
331        let comb = Combinator::ZeroOrMore(Box::new(Combinator::Literal("foo".to_string())));
332        assert!(is_nullable(&comb, &rule_map, &mut visited));
333    }
334
335    #[test]
336    fn test_nullable_char_class() {
337        let rule_map = HashMap::new();
338        let mut visited = HashSet::new();
339        assert!(!is_nullable(
340            &Combinator::CharClass(crate::ir::CharClass::Digit),
341            &rule_map,
342            &mut visited
343        ));
344    }
345}