Skip to main content

adze_ir/
validation.rs

1// Grammar validation and diagnostics for Adze
2//! Grammar validation checking for undefined, unreachable, and non-productive symbols.
3
4// This module provides comprehensive validation and diagnostic capabilities
5
6use crate::{FieldId, Grammar, Symbol, SymbolId};
7use std::collections::{HashMap, HashSet, VecDeque};
8use std::fmt;
9
10/// Grammar validation errors
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum ValidationError {
13    /// Undefined symbol referenced
14    UndefinedSymbol {
15        /// The undefined symbol
16        symbol: SymbolId,
17        /// Where it was referenced
18        location: String,
19    },
20    /// Unreachable symbol (not reachable from start)
21    UnreachableSymbol {
22        /// The unreachable symbol
23        symbol: SymbolId,
24        /// Symbol name
25        name: String,
26    },
27    /// Non-productive symbol (can't derive terminal strings)
28    NonProductiveSymbol {
29        /// The non-productive symbol
30        symbol: SymbolId,
31        /// Symbol name
32        name: String,
33    },
34    /// Cyclic rule without base case
35    CyclicRule {
36        /// Symbols involved in the cycle
37        symbols: Vec<SymbolId>,
38    },
39    /// Duplicate rule definition
40    DuplicateRule {
41        /// Symbol with duplicate rules
42        symbol: SymbolId,
43        /// Number of existing definitions
44        existing_count: usize,
45    },
46    /// Invalid field mapping
47    InvalidField {
48        /// Invalid field ID
49        field_id: FieldId,
50        /// Symbol containing the invalid field
51        rule_symbol: SymbolId,
52    },
53    /// Empty grammar
54    EmptyGrammar,
55    /// Grammar has no explicit start rule
56    NoExplicitStartRule,
57    /// Conflicting precedence declarations
58    ConflictingPrecedence {
59        /// Symbol with conflicting precedences
60        symbol: SymbolId,
61        /// Conflicting precedence values
62        precedences: Vec<i16>,
63    },
64    /// Invalid regex pattern
65    InvalidRegex {
66        /// Token with invalid regex
67        token: SymbolId,
68        /// The invalid pattern
69        pattern: String,
70        /// Error message
71        error: String,
72    },
73    /// External token conflict
74    ExternalTokenConflict {
75        /// First conflicting token
76        token1: String,
77        /// Second conflicting token
78        token2: String,
79    },
80}
81
82impl fmt::Display for ValidationError {
83    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
84        match self {
85            ValidationError::UndefinedSymbol { symbol, location } => {
86                write!(
87                    f,
88                    "Undefined symbol {:?} referenced in {}",
89                    symbol, location
90                )
91            }
92            ValidationError::UnreachableSymbol { symbol, name } => {
93                write!(
94                    f,
95                    "Symbol '{}' ({:?}) is unreachable from start symbol",
96                    name, symbol
97                )
98            }
99            ValidationError::NonProductiveSymbol { symbol, name } => {
100                write!(
101                    f,
102                    "Symbol '{}' ({:?}) cannot derive any terminal strings",
103                    name, symbol
104                )
105            }
106            ValidationError::CyclicRule { symbols } => {
107                write!(f, "Cyclic dependency detected: {:?}", symbols)
108            }
109            ValidationError::DuplicateRule {
110                symbol,
111                existing_count,
112            } => {
113                write!(
114                    f,
115                    "Symbol {:?} has {} rule definitions (expected 1)",
116                    symbol, existing_count
117                )
118            }
119            ValidationError::InvalidField {
120                field_id,
121                rule_symbol,
122            } => {
123                write!(
124                    f,
125                    "Invalid field {:?} in rule for symbol {:?}",
126                    field_id, rule_symbol
127                )
128            }
129            ValidationError::EmptyGrammar => {
130                write!(f, "Grammar has no rules defined")
131            }
132            ValidationError::NoExplicitStartRule => {
133                write!(
134                    f,
135                    "No explicit start rule defined (first rule will be used)"
136                )
137            }
138            ValidationError::ConflictingPrecedence {
139                symbol,
140                precedences,
141            } => {
142                write!(
143                    f,
144                    "Symbol {:?} has conflicting precedences: {:?}",
145                    symbol, precedences
146                )
147            }
148            ValidationError::InvalidRegex {
149                token,
150                pattern,
151                error,
152            } => {
153                write!(
154                    f,
155                    "Invalid regex pattern for token {:?}: '{}' - {}",
156                    token, pattern, error
157                )
158            }
159            ValidationError::ExternalTokenConflict { token1, token2 } => {
160                write!(f, "External tokens '{}' and '{}' conflict", token1, token2)
161            }
162        }
163    }
164}
165
166/// Grammar validator
167pub struct GrammarValidator {
168    errors: Vec<ValidationError>,
169    warnings: Vec<ValidationWarning>,
170}
171
172/// Grammar validation warnings (non-fatal issues)
173#[derive(Debug, Clone, PartialEq, Eq)]
174pub enum ValidationWarning {
175    /// Unused token
176    UnusedToken {
177        /// The unused token
178        token: SymbolId,
179        /// Token name
180        name: String,
181    },
182    /// Duplicate token pattern
183    DuplicateTokenPattern {
184        /// Tokens with duplicate pattern
185        tokens: Vec<SymbolId>,
186        /// The duplicate pattern
187        pattern: String,
188    },
189    /// Ambiguous grammar (may need GLR)
190    AmbiguousGrammar {
191        /// Ambiguity description
192        message: String,
193    },
194    /// Missing field names
195    MissingFieldNames {
196        /// Symbol missing field names
197        rule_symbol: SymbolId,
198    },
199    /// Inefficient rule structure
200    InefficientRule {
201        /// Symbol with inefficient rule
202        symbol: SymbolId,
203        /// Optimization suggestion
204        suggestion: String,
205    },
206}
207
208impl fmt::Display for ValidationWarning {
209    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
210        match self {
211            ValidationWarning::UnusedToken { token, name } => {
212                write!(
213                    f,
214                    "Token '{}' ({:?}) is defined but never used",
215                    name, token
216                )
217            }
218            ValidationWarning::DuplicateTokenPattern { tokens, pattern } => {
219                write!(
220                    f,
221                    "Multiple tokens have the same pattern '{}': {:?}",
222                    pattern, tokens
223                )
224            }
225            ValidationWarning::AmbiguousGrammar { message } => {
226                write!(f, "Grammar ambiguity detected: {}", message)
227            }
228            ValidationWarning::MissingFieldNames { rule_symbol } => {
229                write!(f, "Rule for symbol {:?} has no field names", rule_symbol)
230            }
231            ValidationWarning::InefficientRule { symbol, suggestion } => {
232                write!(
233                    f,
234                    "Inefficient rule for symbol {:?}: {}",
235                    symbol, suggestion
236                )
237            }
238        }
239    }
240}
241
242/// Validation result
243pub struct ValidationResult {
244    /// Validation errors found
245    pub errors: Vec<ValidationError>,
246    /// Validation warnings found
247    pub warnings: Vec<ValidationWarning>,
248    /// Validation statistics
249    pub stats: ValidationStats,
250}
251
252/// Statistics gathered during validation
253#[derive(Debug, Clone, Default)]
254pub struct ValidationStats {
255    /// Total number of symbols
256    pub total_symbols: usize,
257    /// Total number of tokens
258    pub total_tokens: usize,
259    /// Total number of rules
260    pub total_rules: usize,
261    /// Number of reachable symbols
262    pub reachable_symbols: usize,
263    /// Number of productive symbols
264    pub productive_symbols: usize,
265    /// Number of external tokens
266    pub external_tokens: usize,
267    /// Maximum rule length
268    pub max_rule_length: usize,
269    /// Average rule length
270    pub avg_rule_length: f64,
271}
272
273impl Default for GrammarValidator {
274    fn default() -> Self {
275        Self::new()
276    }
277}
278
279impl GrammarValidator {
280    /// Create a new validator
281    pub fn new() -> Self {
282        Self {
283            errors: Vec::new(),
284            warnings: Vec::new(),
285        }
286    }
287
288    /// Validate a grammar and return results
289    pub fn validate(&mut self, grammar: &Grammar) -> ValidationResult {
290        self.errors.clear();
291        self.warnings.clear();
292
293        let mut stats = ValidationStats::default();
294
295        // Basic checks
296        self.check_empty_grammar(grammar);
297
298        // Symbol analysis
299        let defined_symbols = self.collect_defined_symbols(grammar);
300        let used_symbols = self.collect_used_symbols(grammar);
301
302        // Check for undefined symbols
303        self.check_undefined_symbols(&defined_symbols, &used_symbols, grammar);
304
305        // Reachability analysis
306        let reachable = self.find_reachable_symbols(grammar);
307        self.check_unreachable_symbols(&reachable, &defined_symbols, grammar);
308
309        // Productivity analysis
310        let productive = self.find_productive_symbols(grammar);
311        self.check_non_productive_symbols(&productive, &defined_symbols, grammar);
312
313        // Token validation
314        self.validate_tokens(grammar);
315
316        // Field validation
317        self.validate_fields(grammar);
318
319        // Precedence validation
320        self.validate_precedences(grammar);
321
322        // External token validation
323        self.validate_external_tokens(grammar);
324
325        // Check for cycles
326        self.check_cycles(grammar);
327
328        // Check for inefficiencies
329        self.check_inefficiencies(grammar);
330
331        // Gather statistics
332        stats.total_symbols = defined_symbols.len();
333        stats.total_tokens = grammar.tokens.len();
334        stats.total_rules = grammar.rules.values().map(|v| v.len()).sum();
335        stats.reachable_symbols = reachable.len();
336        stats.productive_symbols = productive.len();
337        stats.external_tokens = grammar.externals.len();
338
339        if !grammar.rules.is_empty() {
340            let all_rules: Vec<_> = grammar.all_rules().collect();
341            let total_length: usize = all_rules.iter().map(|r| r.rhs.len()).sum();
342            stats.max_rule_length = all_rules.iter().map(|r| r.rhs.len()).max().unwrap_or(0);
343            stats.avg_rule_length = total_length as f64 / all_rules.len() as f64;
344        }
345
346        ValidationResult {
347            errors: self.errors.clone(),
348            warnings: self.warnings.clone(),
349            stats,
350        }
351    }
352
353    fn check_empty_grammar(&mut self, grammar: &Grammar) {
354        if grammar.rules.is_empty() {
355            self.errors.push(ValidationError::EmptyGrammar);
356        }
357    }
358
359    fn collect_defined_symbols(&self, grammar: &Grammar) -> HashSet<SymbolId> {
360        let mut defined = HashSet::new();
361
362        // All tokens are defined
363        defined.extend(grammar.tokens.keys());
364
365        // All rule LHS are defined
366        defined.extend(grammar.rules.keys());
367
368        // External tokens are defined
369        for external in &grammar.externals {
370            defined.insert(external.symbol_id);
371        }
372
373        defined
374    }
375
376    /// Helper to collect used symbols from a symbol recursively
377    fn collect_used_in_symbol(symbol: &Symbol, used: &mut HashSet<SymbolId>) {
378        match symbol {
379            Symbol::Terminal(id) | Symbol::NonTerminal(id) => {
380                used.insert(*id);
381            }
382            Symbol::External(id) => {
383                used.insert(SymbolId(id.0));
384            }
385            Symbol::Optional(inner) | Symbol::Repeat(inner) | Symbol::RepeatOne(inner) => {
386                Self::collect_used_in_symbol(inner, used);
387            }
388            Symbol::Choice(choices) => {
389                for s in choices {
390                    Self::collect_used_in_symbol(s, used);
391                }
392            }
393            Symbol::Sequence(seq) => {
394                for s in seq {
395                    Self::collect_used_in_symbol(s, used);
396                }
397            }
398            Symbol::Epsilon => {}
399        }
400    }
401
402    fn collect_used_symbols(&self, grammar: &Grammar) -> HashSet<SymbolId> {
403        let mut used = HashSet::new();
404
405        // First rule's LHS is implicitly the start symbol
406        if let Some(start_symbol) = grammar.start_symbol() {
407            used.insert(start_symbol);
408        }
409
410        // Symbols in rule RHS are used
411        for rule in grammar.all_rules() {
412            for symbol in &rule.rhs {
413                Self::collect_used_in_symbol(symbol, &mut used);
414            }
415        }
416
417        used
418    }
419
420    fn check_undefined_symbols(
421        &mut self,
422        defined: &HashSet<SymbolId>,
423        used: &HashSet<SymbolId>,
424        grammar: &Grammar,
425    ) {
426        for symbol in used {
427            if !defined.contains(symbol) {
428                // Find where it's used
429                let mut location = String::from("unknown");
430                for (rule_sym, rules) in &grammar.rules {
431                    for rule in rules {
432                        for rhs_sym in &rule.rhs {
433                            match rhs_sym {
434                                Symbol::Terminal(id) | Symbol::NonTerminal(id) if id == symbol => {
435                                    location = format!("rule for {:?}", rule_sym);
436                                    break;
437                                }
438                                _ => {}
439                            }
440                        }
441                    }
442                }
443
444                self.errors.push(ValidationError::UndefinedSymbol {
445                    symbol: *symbol,
446                    location,
447                });
448            }
449        }
450    }
451
452    /// Helper to add reachable symbols from a symbol
453    fn add_reachable_from_symbol(
454        symbol: &Symbol,
455        reachable: &mut HashSet<SymbolId>,
456        queue: &mut VecDeque<SymbolId>,
457    ) {
458        match symbol {
459            Symbol::Terminal(id) | Symbol::NonTerminal(id) => {
460                if reachable.insert(*id) {
461                    queue.push_back(*id);
462                }
463            }
464            Symbol::External(ext_id) => {
465                let id = SymbolId(ext_id.0);
466                if reachable.insert(id) {
467                    queue.push_back(id);
468                }
469            }
470            Symbol::Optional(inner) | Symbol::Repeat(inner) | Symbol::RepeatOne(inner) => {
471                Self::add_reachable_from_symbol(inner, reachable, queue);
472            }
473            Symbol::Choice(choices) => {
474                for s in choices {
475                    Self::add_reachable_from_symbol(s, reachable, queue);
476                }
477            }
478            Symbol::Sequence(seq) => {
479                for s in seq {
480                    Self::add_reachable_from_symbol(s, reachable, queue);
481                }
482            }
483            Symbol::Epsilon => {}
484        }
485    }
486
487    fn find_reachable_symbols(&self, grammar: &Grammar) -> HashSet<SymbolId> {
488        let mut reachable = HashSet::new();
489        let mut queue = VecDeque::new();
490
491        // Start from the first rule (implicit start symbol)
492        if let Some(start) = grammar.start_symbol() {
493            queue.push_back(start);
494            reachable.insert(start);
495        }
496
497        // BFS to find all reachable symbols
498        while let Some(symbol) = queue.pop_front() {
499            if let Some(rules) = grammar.rules.get(&symbol) {
500                for rule in rules {
501                    for rhs_symbol in &rule.rhs {
502                        Self::add_reachable_from_symbol(rhs_symbol, &mut reachable, &mut queue);
503                    }
504                }
505            }
506        }
507
508        reachable
509    }
510
511    fn check_unreachable_symbols(
512        &mut self,
513        reachable: &HashSet<SymbolId>,
514        defined: &HashSet<SymbolId>,
515        grammar: &Grammar,
516    ) {
517        let start_symbol = grammar.start_symbol();
518
519        for symbol in defined {
520            if !reachable.contains(symbol) && Some(*symbol) != start_symbol {
521                let name = self.get_symbol_name(*symbol, grammar);
522                self.warnings.push(ValidationWarning::UnusedToken {
523                    token: *symbol,
524                    name,
525                });
526            }
527        }
528    }
529
530    /// Helper to check if a symbol is productive
531    fn is_symbol_productive(symbol: &Symbol, productive: &HashSet<SymbolId>) -> bool {
532        match symbol {
533            Symbol::Terminal(id) | Symbol::NonTerminal(id) => productive.contains(id),
534            Symbol::External(ext_id) => productive.contains(&SymbolId(ext_id.0)),
535            Symbol::Epsilon => true,     // Epsilon is always productive
536            Symbol::Optional(_) => true, // Optional is always productive (can be empty)
537            Symbol::Repeat(_) => true,   // Repeat is always productive (can be empty)
538            Symbol::RepeatOne(inner) => Self::is_symbol_productive(inner, productive),
539            Symbol::Choice(choices) => choices
540                .iter()
541                .any(|s| Self::is_symbol_productive(s, productive)),
542            Symbol::Sequence(seq) => seq
543                .iter()
544                .all(|s| Self::is_symbol_productive(s, productive)),
545        }
546    }
547
548    fn find_productive_symbols(&self, grammar: &Grammar) -> HashSet<SymbolId> {
549        let mut productive = HashSet::new();
550        let mut changed = true;
551
552        // All tokens are productive
553        productive.extend(grammar.tokens.keys());
554
555        // External tokens are productive
556        for external in &grammar.externals {
557            productive.insert(external.symbol_id);
558        }
559
560        // Fixed-point iteration to find productive non-terminals
561        while changed {
562            changed = false;
563
564            for (symbol, rules) in &grammar.rules {
565                if !productive.contains(symbol) {
566                    // Check if any rule for this symbol is productive
567                    let any_productive = rules.iter().any(|rule| {
568                        // Check if all RHS symbols are productive
569                        rule.rhs
570                            .iter()
571                            .all(|rhs_sym| Self::is_symbol_productive(rhs_sym, &productive))
572                    });
573
574                    if any_productive {
575                        productive.insert(*symbol);
576                        changed = true;
577                    }
578                }
579            }
580        }
581
582        productive
583    }
584
585    fn check_non_productive_symbols(
586        &mut self,
587        productive: &HashSet<SymbolId>,
588        defined: &HashSet<SymbolId>,
589        grammar: &Grammar,
590    ) {
591        for symbol in defined {
592            if !productive.contains(symbol) {
593                let name = self.get_symbol_name(*symbol, grammar);
594                self.errors.push(ValidationError::NonProductiveSymbol {
595                    symbol: *symbol,
596                    name,
597                });
598            }
599        }
600    }
601
602    fn validate_tokens(&mut self, grammar: &Grammar) {
603        let mut pattern_map: HashMap<String, Vec<SymbolId>> = HashMap::new();
604
605        for (symbol, token) in &grammar.tokens {
606            // Check regex validity
607            if let crate::TokenPattern::Regex(pattern) = &token.pattern {
608                // In a real implementation, we'd compile the regex
609                // For now, just check for basic issues
610                if pattern.is_empty() {
611                    self.errors.push(ValidationError::InvalidRegex {
612                        token: *symbol,
613                        pattern: pattern.clone(),
614                        error: "Empty regex pattern".to_string(),
615                    });
616                }
617            }
618
619            // Check for duplicate patterns
620            let pattern_str = match &token.pattern {
621                crate::TokenPattern::String(s) => s.clone(),
622                crate::TokenPattern::Regex(r) => r.clone(),
623            };
624
625            pattern_map
626                .entry(pattern_str.clone())
627                .or_default()
628                .push(*symbol);
629        }
630
631        // Report duplicate patterns
632        for (pattern, symbols) in pattern_map {
633            if symbols.len() > 1 {
634                self.warnings
635                    .push(ValidationWarning::DuplicateTokenPattern {
636                        tokens: symbols,
637                        pattern,
638                    });
639            }
640        }
641    }
642
643    fn validate_fields(&mut self, grammar: &Grammar) {
644        for (symbol, rules) in &grammar.rules {
645            for rule in rules {
646                // Check that field indices are valid
647                for (field_id, index) in &rule.fields {
648                    if *index >= rule.rhs.len() {
649                        self.errors.push(ValidationError::InvalidField {
650                            field_id: *field_id,
651                            rule_symbol: *symbol,
652                        });
653                    }
654                }
655
656                // Warn about missing field names
657                if rule.fields.is_empty() && rule.rhs.len() > 1 {
658                    self.warnings.push(ValidationWarning::MissingFieldNames {
659                        rule_symbol: *symbol,
660                    });
661                }
662            }
663        }
664    }
665
666    fn validate_precedences(&mut self, grammar: &Grammar) {
667        let mut symbol_precedences: HashMap<SymbolId, Vec<i16>> = HashMap::new();
668
669        // Collect precedences from declarations
670        for prec in &grammar.precedences {
671            for symbol in &prec.symbols {
672                symbol_precedences
673                    .entry(*symbol)
674                    .or_default()
675                    .push(prec.level);
676            }
677        }
678
679        // Check for conflicts
680        for (symbol, precedences) in symbol_precedences {
681            let unique_precs: HashSet<_> = precedences.iter().cloned().collect();
682            if unique_precs.len() > 1 {
683                self.errors.push(ValidationError::ConflictingPrecedence {
684                    symbol,
685                    precedences: unique_precs.into_iter().collect(),
686                });
687            }
688        }
689    }
690
691    fn validate_external_tokens(&mut self, grammar: &Grammar) {
692        let mut names = HashSet::new();
693
694        for external in &grammar.externals {
695            if !names.insert(&external.name) {
696                self.errors.push(ValidationError::ExternalTokenConflict {
697                    token1: external.name.clone(),
698                    token2: external.name.clone(),
699                });
700            }
701        }
702    }
703
704    fn check_cycles(&mut self, grammar: &Grammar) {
705        // Simple cycle detection using DFS
706        let mut visited = HashSet::new();
707        let mut rec_stack = HashSet::new();
708        let mut path = Vec::new();
709
710        for symbol in grammar.rules.keys() {
711            if !visited.contains(symbol)
712                && self.has_cycle(*symbol, grammar, &mut visited, &mut rec_stack, &mut path)
713            {
714                self.errors.push(ValidationError::CyclicRule {
715                    symbols: path.clone(),
716                });
717            }
718        }
719    }
720
721    #[allow(clippy::only_used_in_recursion)]
722    fn has_cycle(
723        &self,
724        symbol: SymbolId,
725        grammar: &Grammar,
726        visited: &mut HashSet<SymbolId>,
727        rec_stack: &mut HashSet<SymbolId>,
728        path: &mut Vec<SymbolId>,
729    ) -> bool {
730        visited.insert(symbol);
731        rec_stack.insert(symbol);
732        path.push(symbol);
733
734        if let Some(rules) = grammar.rules.get(&symbol) {
735            for rule in rules {
736                for rhs_symbol in &rule.rhs {
737                    if let Symbol::NonTerminal(id) = rhs_symbol {
738                        if !visited.contains(id) {
739                            if self.has_cycle(*id, grammar, visited, rec_stack, path) {
740                                return true;
741                            }
742                        } else if rec_stack.contains(id) {
743                            // Found a cycle
744                            return true;
745                        }
746                    }
747                }
748            }
749        }
750
751        path.pop();
752        rec_stack.remove(&symbol);
753        false
754    }
755
756    fn check_inefficiencies(&mut self, grammar: &Grammar) {
757        for (symbol, rules) in &grammar.rules {
758            for rule in rules {
759                // Check for trivial rules (A -> B)
760                if rule.rhs.len() == 1
761                    && let Symbol::NonTerminal(_) = &rule.rhs[0]
762                {
763                    self.warnings.push(ValidationWarning::InefficientRule {
764                        symbol: *symbol,
765                        suggestion: "Consider inlining trivial rules".to_string(),
766                    });
767                }
768
769                // Check for very long rules
770                if rule.rhs.len() > 10 {
771                    self.warnings.push(ValidationWarning::InefficientRule {
772                        symbol: *symbol,
773                        suggestion: format!(
774                            "Rule has {} symbols, consider breaking it down",
775                            rule.rhs.len()
776                        ),
777                    });
778                }
779            }
780        }
781    }
782
783    fn get_symbol_name(&self, symbol: SymbolId, grammar: &Grammar) -> String {
784        if let Some(token) = grammar.tokens.get(&symbol) {
785            return token.name.clone();
786        }
787
788        if let Some(_rule) = grammar.rules.get(&symbol) {
789            return format!("rule_{}", symbol.0);
790        }
791
792        for external in &grammar.externals {
793            if external.symbol_id == symbol {
794                return external.name.clone();
795            }
796        }
797
798        format!("symbol_{}", symbol.0)
799    }
800}
801
802#[cfg(test)]
803mod tests {
804    use super::*;
805    use crate::{
806        FieldId, Grammar, Precedence, ProductionId, Rule, Symbol, SymbolId, Token, TokenPattern,
807    };
808
809    #[test]
810    fn test_empty_grammar_validation() {
811        let grammar = Grammar::new("test".to_string());
812        let mut validator = GrammarValidator::new();
813        let result = validator.validate(&grammar);
814
815        assert!(
816            result
817                .errors
818                .iter()
819                .any(|e| matches!(e, ValidationError::EmptyGrammar))
820        );
821    }
822
823    #[test]
824    fn test_undefined_symbol() {
825        let mut grammar = Grammar::new("test".to_string());
826        let expr = SymbolId(1);
827        let undefined = SymbolId(99);
828
829        grammar.add_rule(Rule {
830            lhs: expr,
831            rhs: vec![Symbol::NonTerminal(undefined)],
832            precedence: None,
833            associativity: None,
834            fields: vec![],
835            production_id: ProductionId(0),
836        });
837
838        let mut validator = GrammarValidator::new();
839        let result = validator.validate(&grammar);
840
841        assert!(result.errors.iter().any(|e| {
842            matches!(e, ValidationError::UndefinedSymbol { symbol, .. } if *symbol == undefined)
843        }));
844    }
845
846    #[test]
847    fn test_non_productive_symbol() {
848        let mut grammar = Grammar::new("test".to_string());
849        let a = SymbolId(1);
850        let b = SymbolId(2);
851
852        // A -> B
853        grammar.add_rule(Rule {
854            lhs: a,
855            rhs: vec![Symbol::NonTerminal(b)],
856            precedence: None,
857            associativity: None,
858            fields: vec![],
859            production_id: ProductionId(0),
860        });
861
862        // B -> A (circular, non-productive)
863        grammar.add_rule(Rule {
864            lhs: b,
865            rhs: vec![Symbol::NonTerminal(a)],
866            precedence: None,
867            associativity: None,
868            fields: vec![],
869            production_id: ProductionId(1),
870        });
871
872        let mut validator = GrammarValidator::new();
873        let result = validator.validate(&grammar);
874
875        assert!(
876            result
877                .errors
878                .iter()
879                .any(|e| { matches!(e, ValidationError::NonProductiveSymbol { .. }) })
880        );
881    }
882
883    #[test]
884    fn test_valid_grammar() {
885        let mut grammar = Grammar::new("test".to_string());
886        let expr = SymbolId(1);
887        let num = SymbolId(2);
888
889        // Add token
890        grammar.tokens.insert(
891            num,
892            Token {
893                name: "number".to_string(),
894                pattern: TokenPattern::Regex(r"\d+".to_string()),
895                fragile: false,
896            },
897        );
898
899        // expr -> num
900        grammar.add_rule(Rule {
901            lhs: expr,
902            rhs: vec![Symbol::Terminal(num)],
903            precedence: None,
904            associativity: None,
905            fields: vec![],
906            production_id: ProductionId(0),
907        });
908
909        // No explicit start symbol field, first rule is implicit start
910
911        let mut validator = GrammarValidator::new();
912        let result = validator.validate(&grammar);
913
914        assert!(result.errors.is_empty());
915        assert_eq!(result.stats.total_symbols, 2);
916        assert_eq!(result.stats.productive_symbols, 2);
917    }
918
919    #[test]
920    fn test_duplicate_token_patterns() {
921        let mut grammar = Grammar::new("test".to_string());
922
923        // Add two tokens with the same pattern
924        grammar.tokens.insert(
925            SymbolId(1),
926            Token {
927                name: "plus1".to_string(),
928                pattern: TokenPattern::String("+".to_string()),
929                fragile: false,
930            },
931        );
932
933        grammar.tokens.insert(
934            SymbolId(2),
935            Token {
936                name: "plus2".to_string(),
937                pattern: TokenPattern::String("+".to_string()),
938                fragile: false,
939            },
940        );
941
942        let mut validator = GrammarValidator::new();
943        let result = validator.validate(&grammar);
944
945        assert!(result.warnings.iter().any(|w| {
946            matches!(w, ValidationWarning::DuplicateTokenPattern { pattern, .. } if pattern == "+")
947        }));
948    }
949
950    #[test]
951    fn test_invalid_field_index() {
952        let mut grammar = Grammar::new("test".to_string());
953        let expr = SymbolId(1);
954        let num = SymbolId(2);
955
956        grammar.tokens.insert(
957            num,
958            Token {
959                name: "number".to_string(),
960                pattern: TokenPattern::Regex(r"\d+".to_string()),
961                fragile: false,
962            },
963        );
964
965        // Rule with invalid field index
966        grammar.add_rule(Rule {
967            lhs: expr,
968            rhs: vec![Symbol::Terminal(num)],
969            precedence: None,
970            associativity: None,
971            fields: vec![(FieldId(0), 5)], // Index 5 is out of bounds
972            production_id: ProductionId(0),
973        });
974
975        let mut validator = GrammarValidator::new();
976        let result = validator.validate(&grammar);
977
978        assert!(
979            result
980                .errors
981                .iter()
982                .any(|e| { matches!(e, ValidationError::InvalidField { .. }) })
983        );
984    }
985
986    #[test]
987    fn test_cyclic_rules() {
988        let mut grammar = Grammar::new("test".to_string());
989        let a = SymbolId(1);
990        let b = SymbolId(2);
991        let c = SymbolId(3);
992
993        // Create a cycle: A -> B -> C -> A
994        grammar.add_rule(Rule {
995            lhs: a,
996            rhs: vec![Symbol::NonTerminal(b)],
997            precedence: None,
998            associativity: None,
999            fields: vec![],
1000            production_id: ProductionId(0),
1001        });
1002
1003        grammar.add_rule(Rule {
1004            lhs: b,
1005            rhs: vec![Symbol::NonTerminal(c)],
1006            precedence: None,
1007            associativity: None,
1008            fields: vec![],
1009            production_id: ProductionId(1),
1010        });
1011
1012        grammar.add_rule(Rule {
1013            lhs: c,
1014            rhs: vec![Symbol::NonTerminal(a)],
1015            precedence: None,
1016            associativity: None,
1017            fields: vec![],
1018            production_id: ProductionId(2),
1019        });
1020
1021        let mut validator = GrammarValidator::new();
1022        let result = validator.validate(&grammar);
1023
1024        assert!(
1025            result
1026                .errors
1027                .iter()
1028                .any(|e| { matches!(e, ValidationError::CyclicRule { .. }) })
1029        );
1030    }
1031
1032    #[test]
1033    fn test_conflicting_precedence() {
1034        let mut grammar = Grammar::new("test".to_string());
1035        let plus = SymbolId(1);
1036
1037        // Add conflicting precedence declarations
1038        grammar.precedences.push(Precedence {
1039            level: 1,
1040            associativity: crate::Associativity::Left,
1041            symbols: vec![plus],
1042        });
1043
1044        grammar.precedences.push(Precedence {
1045            level: 2,
1046            associativity: crate::Associativity::Right,
1047            symbols: vec![plus],
1048        });
1049
1050        let mut validator = GrammarValidator::new();
1051        let result = validator.validate(&grammar);
1052
1053        assert!(result.errors.iter().any(|e| {
1054            matches!(e, ValidationError::ConflictingPrecedence { symbol, .. } if *symbol == plus)
1055        }));
1056    }
1057}