Skip to main content

adze_ir/
lib.rs

1// IR crate should be safe - no unsafe needed for grammar representation
2#![forbid(unsafe_code)]
3#![cfg_attr(feature = "strict_docs", deny(missing_docs))]
4#![cfg_attr(not(feature = "strict_docs"), warn(missing_docs))]
5
6//! Grammar Intermediate Representation for Adze
7//! This module provides GLR-aware data structures for representing grammars
8
9use indexmap::IndexMap;
10use serde::{Deserialize, Serialize};
11use std::fmt;
12
13/// Error types and Result alias for IR operations.
14pub mod error;
15/// Error types for grammar IR operations.
16pub use error::{IrError, Result as IrResult};
17
18/// Grammar optimization utilities
19pub mod optimizer;
20/// Grammar optimization utilities and statistics.
21pub use optimizer::{GrammarOptimizer, OptimizationStats, optimize_grammar};
22
23/// Grammar validation utilities
24pub mod validation;
25/// Grammar validation types and results.
26pub use validation::{GrammarValidator, ValidationError, ValidationResult, ValidationWarning};
27
28/// Debug macros for development
29pub mod debug_macros;
30/// Symbol registry for managing grammar symbols
31pub mod symbol_registry;
32/// Symbol registry for deterministic ID assignment.
33pub use symbol_registry::{SymbolInfo, SymbolRegistry};
34/// Builder API for programmatically constructing grammars
35pub mod builder;
36
37/// Core grammar representation supporting all Tree-sitter features including GLR
38#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
39pub struct Grammar {
40    /// Grammar name
41    pub name: String,
42    /// Production rules indexed by left-hand side symbol
43    pub rules: IndexMap<SymbolId, Vec<Rule>>,
44    /// Token definitions
45    pub tokens: IndexMap<SymbolId, Token>,
46    /// Precedence declarations
47    pub precedences: Vec<Precedence>,
48    /// Conflict resolution declarations
49    pub conflicts: Vec<ConflictDeclaration>,
50    /// External scanner tokens
51    pub externals: Vec<ExternalToken>,
52    /// Extra tokens (e.g., whitespace, comments)
53    pub extras: Vec<SymbolId>,
54    /// Field names maintained in lexicographic order
55    pub fields: IndexMap<FieldId, String>,
56    /// Supertype symbols
57    pub supertypes: Vec<SymbolId>,
58    /// Rules to inline during generation
59    pub inline_rules: Vec<SymbolId>,
60    /// Alias sequences for productions
61    pub alias_sequences: IndexMap<ProductionId, AliasSequence>,
62    /// Maps rule IDs to production IDs
63    pub production_ids: IndexMap<RuleId, ProductionId>,
64    /// Maximum alias sequence length
65    pub max_alias_sequence_length: usize,
66    /// Maps symbol IDs to rule names
67    pub rule_names: IndexMap<SymbolId, String>,
68    /// Centralized symbol registry
69    pub symbol_registry: Option<SymbolRegistry>,
70}
71
72impl Grammar {
73    /// Add a rule to the grammar
74    pub fn add_rule(&mut self, rule: Rule) {
75        self.rules.entry(rule.lhs).or_default().push(rule);
76    }
77
78    /// Get all rules for a given LHS symbol
79    pub fn get_rules_for_symbol(&self, symbol: SymbolId) -> Option<&Vec<Rule>> {
80        self.rules.get(&symbol)
81    }
82
83    /// Iterate over all rules in the grammar
84    pub fn all_rules(&self) -> impl Iterator<Item = &Rule> {
85        self.rules.values().flat_map(|rules| rules.iter())
86    }
87
88    /// Get the start symbol (LHS of the first rule)
89    pub fn start_symbol(&self) -> Option<SymbolId> {
90        // For Tree-sitter compatibility, look for "source_file" symbol
91        if let Some(source_file_id) = self.find_symbol_by_name("source_file")
92            && self.rules.contains_key(&source_file_id)
93        {
94            return Some(source_file_id);
95        }
96
97        // In adze, source_file is often just a reference to the actual language type
98        // So let's look for the language type that's marked with #[adze::language]
99        // This is typically the first non-terminal that has rules
100
101        // Try common patterns first
102        for name in &["Expression", "Statement", "Program", "Module"] {
103            if let Some(symbol_id) = self.find_symbol_by_name(name)
104                && self.rules.contains_key(&symbol_id)
105            {
106                return Some(symbol_id);
107            }
108        }
109
110        // Otherwise, use the first symbol that has rules and isn't a leaf/token
111        for (symbol_id, rules) in &self.rules {
112            // Skip symbols that look like internal/generated names
113            if let Some(name) = self.rule_names.get(symbol_id)
114                && !name.contains('_')
115                && !rules.is_empty()
116            {
117                return Some(*symbol_id);
118            }
119        }
120
121        // Final fallback: just use the first symbol with rules
122        self.rules.keys().next().copied()
123    }
124
125    /// Find a symbol by its name in rule_names
126    pub fn find_symbol_by_name(&self, name: &str) -> Option<SymbolId> {
127        for (symbol_id, symbol_name) in &self.rule_names {
128            if symbol_name == name {
129                return Some(*symbol_id);
130            }
131        }
132        None
133    }
134
135    /// Build or get the symbol registry
136    pub fn get_or_build_registry(&mut self) -> &SymbolRegistry {
137        if self.symbol_registry.is_none() {
138            self.symbol_registry = Some(self.build_registry());
139        }
140        // SAFETY: we just ensured `symbol_registry` is `Some` above
141        self.symbol_registry
142            .as_ref()
143            .expect("symbol_registry was just initialized above")
144    }
145
146    /// Check for empty string terminals (separate from main validate)
147    #[must_use = "validation result must be checked"]
148    pub fn check_empty_terminals(&self) -> Result<(), Vec<String>> {
149        let mut errors = Vec::new();
150
151        // Check for empty string terminals
152        for (id, token) in &self.tokens {
153            match &token.pattern {
154                TokenPattern::String(s) if s.is_empty() => {
155                    errors.push(format!(
156                        "Token '{}' (id={}) has empty string pattern",
157                        token.name, id.0
158                    ));
159                }
160                TokenPattern::Regex(r) if r.is_empty() => {
161                    errors.push(format!(
162                        "Token '{}' (id={}) has empty regex pattern",
163                        token.name, id.0
164                    ));
165                }
166                _ => {}
167            }
168        }
169
170        if errors.is_empty() {
171            Ok(())
172        } else {
173            Err(errors)
174        }
175    }
176
177    /// Build a new symbol registry from the grammar
178    pub fn build_registry(&self) -> SymbolRegistry {
179        let mut registry = SymbolRegistry::new();
180
181        // Sort tokens deterministically: underscore-prefixed last
182        let mut token_entries: Vec<_> = self.tokens.iter().collect();
183        token_entries.sort_by_key(|(_id, token)| {
184            let name = &token.name;
185            (name.starts_with('_'), name.clone())
186        });
187
188        // Register all tokens
189        for (symbol_id, token) in token_entries {
190            let metadata = SymbolMetadata {
191                visible: !token.name.starts_with('_'),
192                named: false,
193                hidden: self.extras.contains(symbol_id),
194                terminal: true,
195            };
196            registry.register(&token.name, metadata);
197        }
198
199        // Sort non-terminals deterministically
200        let mut rule_entries: Vec<_> = self.rule_names.iter().collect();
201        rule_entries.sort_by_key(|(_, name)| (*name).clone());
202
203        // Register all non-terminals
204        for (symbol_id, name) in rule_entries {
205            if !self.tokens.contains_key(symbol_id) {
206                let metadata = SymbolMetadata {
207                    visible: !name.starts_with('_'),
208                    named: true,
209                    hidden: name.starts_with('_'),
210                    terminal: false,
211                };
212                registry.register(name, metadata);
213            }
214        }
215
216        // Register externals
217        for external in &self.externals {
218            let metadata = SymbolMetadata {
219                visible: true,
220                named: false,
221                hidden: false,
222                terminal: true,
223            };
224            registry.register(&external.name, metadata);
225        }
226
227        registry
228    }
229}
230
231/// Grammar rule supporting GLR multiple actions per state
232#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
233pub struct Rule {
234    /// Left-hand side symbol
235    pub lhs: SymbolId,
236    /// Right-hand side symbols
237    pub rhs: Vec<Symbol>,
238    /// Precedence if specified
239    pub precedence: Option<PrecedenceKind>,
240    /// Associativity if specified
241    pub associativity: Option<Associativity>,
242    /// Field to position mapping
243    pub fields: Vec<(FieldId, usize)>,
244    /// Production ID
245    pub production_id: ProductionId,
246}
247
248/// Precedence supporting both static and dynamic precedence (PREC_DYNAMIC)
249#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
250pub enum PrecedenceKind {
251    /// Static precedence
252    Static(i16),
253    /// Dynamic precedence
254    Dynamic(i16),
255}
256
257/// Token with fragile flag for lexical vs parse conflicts
258#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
259pub struct Token {
260    /// Token name
261    pub name: String,
262    /// Token pattern (string or regex)
263    pub pattern: TokenPattern,
264    /// TSFragile flag for lexical vs parse conflicts
265    pub fragile: bool,
266}
267
268/// Token pattern representation
269#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
270pub enum TokenPattern {
271    /// String literal pattern
272    String(String),
273    /// Regular expression pattern
274    Regex(String),
275}
276
277/// Grammar symbol types
278#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
279pub enum Symbol {
280    /// Terminal symbol
281    Terminal(SymbolId),
282    /// Non-terminal symbol
283    NonTerminal(SymbolId),
284    /// External scanner symbol
285    External(SymbolId),
286    /// Optional symbol (zero or one)
287    Optional(Box<Symbol>),
288    /// Zero or more repetitions
289    Repeat(Box<Symbol>),
290    /// One or more repetitions
291    RepeatOne(Box<Symbol>),
292    /// Choice between symbols
293    Choice(Vec<Symbol>),
294    /// Sequence of symbols
295    Sequence(Vec<Symbol>),
296    /// Empty production
297    Epsilon,
298}
299
300/// Alias sequence for node renaming
301#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
302pub struct AliasSequence {
303    /// Aliases for each position
304    pub aliases: Vec<Option<String>>,
305}
306
307/// Precedence declaration
308#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
309pub struct Precedence {
310    /// Precedence level
311    pub level: i16,
312    /// Associativity for this level
313    pub associativity: Associativity,
314    /// Symbols at this precedence level
315    pub symbols: Vec<SymbolId>,
316}
317
318/// Associativity for conflict resolution
319#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
320pub enum Associativity {
321    /// Left associative
322    Left,
323    /// Right associative
324    Right,
325    /// Non-associative
326    None,
327}
328
329/// Conflict declaration for GLR handling
330#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
331pub struct ConflictDeclaration {
332    /// Conflicting symbols
333    pub symbols: Vec<SymbolId>,
334    /// Conflict resolution strategy
335    pub resolution: ConflictResolution,
336}
337
338/// How to resolve conflicts
339#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
340pub enum ConflictResolution {
341    /// Resolve by precedence
342    Precedence(PrecedenceKind),
343    /// Resolve by associativity
344    Associativity(Associativity),
345    /// Allow GLR fork/merge
346    GLR,
347}
348
349/// External token declaration
350#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
351pub struct ExternalToken {
352    /// External token name
353    pub name: String,
354    /// Symbol ID for the external token
355    pub symbol_id: SymbolId,
356}
357
358// Type-safe IDs
359/// Symbol identifier
360#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
361pub struct SymbolId(pub u16);
362
363/// Rule identifier
364#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
365pub struct RuleId(pub u16);
366
367/// State identifier
368#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
369pub struct StateId(pub u16);
370
371/// Field identifier
372#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
373pub struct FieldId(pub u16);
374
375/// Production identifier
376#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
377pub struct ProductionId(pub u16);
378
379// Display implementations for debugging
380impl fmt::Display for SymbolId {
381    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
382        write!(f, "Symbol({})", self.0)
383    }
384}
385
386impl fmt::Display for RuleId {
387    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
388        write!(f, "Rule({})", self.0)
389    }
390}
391
392impl fmt::Display for StateId {
393    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
394        write!(f, "State({})", self.0)
395    }
396}
397
398impl fmt::Display for FieldId {
399    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
400        write!(f, "Field({})", self.0)
401    }
402}
403
404impl fmt::Display for ProductionId {
405    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
406        write!(f, "Production({})", self.0)
407    }
408}
409
410/// Metadata for a symbol in the language
411#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
412pub struct SymbolMetadata {
413    /// Whether the symbol is visible
414    pub visible: bool,
415    /// Whether the symbol is named
416    pub named: bool,
417    /// Whether the symbol is hidden
418    pub hidden: bool,
419    /// Whether the symbol is a terminal
420    pub terminal: bool,
421}
422
423/// Grammar validation and processing
424impl Grammar {
425    /// Create a new empty grammar
426    pub fn new(name: String) -> Self {
427        Self {
428            name,
429            rules: IndexMap::new(),
430            tokens: IndexMap::new(),
431            precedences: Vec::new(),
432            conflicts: Vec::new(),
433            externals: Vec::new(),
434            extras: Vec::new(),
435            fields: IndexMap::new(),
436            supertypes: Vec::new(),
437            inline_rules: Vec::new(),
438            alias_sequences: IndexMap::new(),
439            production_ids: IndexMap::new(),
440            max_alias_sequence_length: 0,
441            rule_names: IndexMap::new(),
442            symbol_registry: None,
443        }
444    }
445
446    /// Extract IR from procedural macro data
447    #[must_use = "parsing result must be checked"]
448    pub fn from_macro_output(data: &str) -> Result<Self, GrammarError> {
449        // This will be implemented to parse the output from adze macros
450        serde_json::from_str(data).map_err(GrammarError::ParseError)
451    }
452
453    /// Helper to validate a symbol recursively
454    fn validate_symbol(&self, symbol: &Symbol) -> Result<(), GrammarError> {
455        match symbol {
456            Symbol::Terminal(id) | Symbol::NonTerminal(id) => {
457                if !self.rules.contains_key(id) && !self.tokens.contains_key(id) {
458                    return Err(GrammarError::UnresolvedSymbol(*id));
459                }
460            }
461            Symbol::External(id) => {
462                if !self.externals.iter().any(|ext| ext.symbol_id == *id) {
463                    return Err(GrammarError::UnresolvedExternalSymbol(*id));
464                }
465            }
466            Symbol::Optional(inner) | Symbol::Repeat(inner) | Symbol::RepeatOne(inner) => {
467                self.validate_symbol(inner)?;
468            }
469            Symbol::Choice(choices) => {
470                for s in choices {
471                    self.validate_symbol(s)?;
472                }
473            }
474            Symbol::Sequence(seq) => {
475                for s in seq {
476                    self.validate_symbol(s)?;
477                }
478            }
479            Symbol::Epsilon => {}
480        }
481        Ok(())
482    }
483
484    /// Validate grammar consistency and detect issues
485    #[must_use = "validation result must be checked"]
486    pub fn validate(&self) -> Result<(), GrammarError> {
487        // Validate field name ordering (must be lexicographic)
488        let mut field_names: Vec<_> = self.fields.values().collect();
489        field_names.sort();
490        let expected_order: Vec<_> = self.fields.values().collect();
491        if field_names != expected_order {
492            return Err(GrammarError::InvalidFieldOrdering);
493        }
494
495        // Validate symbol references
496        for rule in self.all_rules() {
497            for symbol in &rule.rhs {
498                self.validate_symbol(symbol)?;
499            }
500        }
501
502        Ok(())
503    }
504
505    /// Apply grammar transformations for better table generation
506    pub fn optimize(&mut self) {
507        // Remove unused rules
508        // Inline simple rules where beneficial
509        // Optimize precedence declarations
510        // This will be implemented based on Tree-sitter's optimization strategies
511    }
512
513    /// Normalize complex symbols by creating auxiliary rules
514    /// This expands Optional, Repeat, Choice, etc. into standard rules
515    pub fn normalize(&mut self) -> Vec<Rule> {
516        let max_id = self.rules.keys().map(|id| id.0).max().unwrap_or(0);
517        let mut aux_counter = max_id + 1000; // Start auxiliary IDs well above existing ones
518
519        // We need to keep processing until no complex symbols remain
520        loop {
521            let mut found_complex = false;
522            let mut all_rules = Vec::new();
523
524            // Collect all current rules
525            for (_lhs, rules) in &self.rules {
526                for rule in rules {
527                    all_rules.push(rule.clone());
528                }
529            }
530
531            // Clear existing rules
532            self.rules.clear();
533
534            // Process each rule
535            for mut rule in all_rules {
536                let mut new_rhs = Vec::new();
537
538                for symbol in rule.rhs {
539                    match symbol {
540                        Symbol::Optional(inner) => {
541                            found_complex = true;
542                            // Create aux rule: aux -> inner | ε
543                            let aux_id = SymbolId(aux_counter);
544                            aux_counter += 1;
545
546                            // aux -> inner (recursively normalize the inner symbol)
547                            let inner_rule = Rule {
548                                lhs: aux_id,
549                                rhs: vec![*inner.clone()],
550                                precedence: None,
551                                associativity: None,
552                                fields: vec![],
553                                production_id: ProductionId(0),
554                            };
555                            self.add_rule(inner_rule);
556
557                            // aux -> ε
558                            self.add_rule(Rule {
559                                lhs: aux_id,
560                                rhs: vec![Symbol::Epsilon],
561                                precedence: None,
562                                associativity: None,
563                                fields: vec![],
564                                production_id: ProductionId(0),
565                            });
566
567                            new_rhs.push(Symbol::NonTerminal(aux_id));
568                        }
569                        Symbol::Repeat(inner) => {
570                            found_complex = true;
571                            // Create aux rule: aux -> aux inner | ε
572                            let aux_id = SymbolId(aux_counter);
573                            aux_counter += 1;
574
575                            // aux -> aux inner (recursively normalize)
576                            self.add_rule(Rule {
577                                lhs: aux_id,
578                                rhs: vec![Symbol::NonTerminal(aux_id), *inner.clone()],
579                                precedence: None,
580                                associativity: None,
581                                fields: vec![],
582                                production_id: ProductionId(0),
583                            });
584
585                            // aux -> ε
586                            self.add_rule(Rule {
587                                lhs: aux_id,
588                                rhs: vec![Symbol::Epsilon],
589                                precedence: None,
590                                associativity: None,
591                                fields: vec![],
592                                production_id: ProductionId(0),
593                            });
594
595                            new_rhs.push(Symbol::NonTerminal(aux_id));
596                        }
597                        Symbol::RepeatOne(inner) => {
598                            found_complex = true;
599                            // Create aux rule: aux -> aux inner | inner
600                            let aux_id = SymbolId(aux_counter);
601                            aux_counter += 1;
602
603                            // aux -> aux inner
604                            self.add_rule(Rule {
605                                lhs: aux_id,
606                                rhs: vec![Symbol::NonTerminal(aux_id), *inner.clone()],
607                                precedence: None,
608                                associativity: None,
609                                fields: vec![],
610                                production_id: ProductionId(0),
611                            });
612
613                            // aux -> inner
614                            self.add_rule(Rule {
615                                lhs: aux_id,
616                                rhs: vec![*inner],
617                                precedence: None,
618                                associativity: None,
619                                fields: vec![],
620                                production_id: ProductionId(0),
621                            });
622
623                            new_rhs.push(Symbol::NonTerminal(aux_id));
624                        }
625                        Symbol::Choice(choices) => {
626                            found_complex = true;
627                            // Create aux rules: aux -> choice1 | choice2 | ...
628                            let aux_id = SymbolId(aux_counter);
629                            aux_counter += 1;
630
631                            for choice in choices {
632                                self.add_rule(Rule {
633                                    lhs: aux_id,
634                                    rhs: vec![choice],
635                                    precedence: None,
636                                    associativity: None,
637                                    fields: vec![],
638                                    production_id: ProductionId(0),
639                                });
640                            }
641
642                            new_rhs.push(Symbol::NonTerminal(aux_id));
643                        }
644                        Symbol::Sequence(seq) => {
645                            found_complex = true;
646                            // Flatten sequence into the current rule
647                            new_rhs.extend(seq);
648                        }
649                        other => new_rhs.push(other),
650                    }
651                }
652
653                rule.rhs = new_rhs;
654                self.add_rule(rule);
655            }
656
657            // If no complex symbols were found in this iteration, we're done
658            if !found_complex {
659                break;
660            }
661        }
662
663        // Return all rules for compatibility (though caller probably doesn't need this)
664        self.rules.values().flatten().cloned().collect()
665    }
666}
667
668/// Grammar processing errors
669#[derive(Debug, thiserror::Error)]
670pub enum GrammarError {
671    /// Failed to parse grammar
672    #[error("Failed to parse grammar: {0}")]
673    ParseError(#[from] serde_json::Error),
674
675    /// Invalid field ordering
676    #[error("Invalid field ordering - fields must be in lexicographic order")]
677    InvalidFieldOrdering,
678
679    /// Unresolved symbol reference
680    #[error("Unresolved symbol reference: {0}")]
681    UnresolvedSymbol(SymbolId),
682
683    /// Unresolved external symbol reference
684    #[error("Unresolved external symbol reference: {0}")]
685    UnresolvedExternalSymbol(SymbolId),
686
687    /// Conflict in grammar
688    #[error("Conflict in grammar: {0}")]
689    ConflictError(String),
690
691    /// Invalid precedence declaration
692    #[error("Invalid precedence declaration: {0}")]
693    InvalidPrecedence(String),
694}
695
696#[cfg(test)]
697mod tests {
698    use super::*;
699
700    #[test]
701    fn test_grammar_creation() {
702        let grammar = Grammar::new("test".to_string());
703        assert_eq!(grammar.name, "test");
704        assert!(grammar.rules.is_empty());
705        assert!(grammar.tokens.is_empty());
706        assert!(grammar.precedences.is_empty());
707        assert!(grammar.conflicts.is_empty());
708        assert!(grammar.externals.is_empty());
709        assert!(grammar.fields.is_empty());
710        assert!(grammar.supertypes.is_empty());
711        assert!(grammar.inline_rules.is_empty());
712        assert!(grammar.alias_sequences.is_empty());
713        assert!(grammar.production_ids.is_empty());
714        assert_eq!(grammar.max_alias_sequence_length, 0);
715    }
716
717    #[test]
718    fn test_field_ordering_validation() {
719        let mut grammar = Grammar::new("test".to_string());
720
721        // Add fields in non-lexicographic order
722        grammar.fields.insert(FieldId(1), "zebra".to_string());
723        grammar.fields.insert(FieldId(0), "alpha".to_string());
724
725        // Validation should fail
726        assert!(grammar.validate().is_err());
727
728        // Fix the ordering
729        grammar.fields.clear();
730        grammar.fields.insert(FieldId(0), "alpha".to_string());
731        grammar.fields.insert(FieldId(1), "zebra".to_string());
732
733        // Validation should now pass
734        assert!(grammar.validate().is_ok());
735    }
736
737    #[test]
738    fn test_symbol_id_display() {
739        let symbol_id = SymbolId(42);
740        assert_eq!(format!("{}", symbol_id), "Symbol(42)");
741
742        let rule_id = RuleId(10);
743        assert_eq!(format!("{}", rule_id), "Rule(10)");
744
745        let state_id = StateId(5);
746        assert_eq!(format!("{}", state_id), "State(5)");
747
748        let field_id = FieldId(3);
749        assert_eq!(format!("{}", field_id), "Field(3)");
750
751        let production_id = ProductionId(7);
752        assert_eq!(format!("{}", production_id), "Production(7)");
753    }
754
755    #[test]
756    fn test_precedence_kinds() {
757        let static_prec = PrecedenceKind::Static(5);
758        let dynamic_prec = PrecedenceKind::Dynamic(10);
759
760        match static_prec {
761            PrecedenceKind::Static(level) => assert_eq!(level, 5),
762            _ => panic!("Expected static precedence"),
763        }
764
765        match dynamic_prec {
766            PrecedenceKind::Dynamic(level) => assert_eq!(level, 10),
767            _ => panic!("Expected dynamic precedence"),
768        }
769    }
770
771    #[test]
772    fn test_symbol_types() {
773        let terminal = Symbol::Terminal(SymbolId(1));
774        let non_terminal = Symbol::NonTerminal(SymbolId(2));
775        let external = Symbol::External(SymbolId(3));
776
777        match terminal {
778            Symbol::Terminal(SymbolId(1)) => {}
779            _ => panic!("Expected terminal symbol"),
780        }
781
782        match non_terminal {
783            Symbol::NonTerminal(SymbolId(2)) => {}
784            _ => panic!("Expected non-terminal symbol"),
785        }
786
787        match external {
788            Symbol::External(SymbolId(3)) => {}
789            _ => panic!("Expected external symbol"),
790        }
791
792        // Test equality and hashing
793        assert_eq!(terminal, Symbol::Terminal(SymbolId(1)));
794        assert_ne!(terminal, non_terminal);
795
796        let mut set = std::collections::HashSet::new();
797        set.insert(terminal.clone());
798        assert!(set.contains(&terminal));
799        assert!(!set.contains(&non_terminal));
800    }
801
802    #[test]
803    fn test_token_patterns() {
804        let string_pattern = TokenPattern::String("hello".to_string());
805        let regex_pattern = TokenPattern::Regex(r"\d+".to_string());
806
807        match string_pattern {
808            TokenPattern::String(s) => assert_eq!(s, "hello"),
809            _ => panic!("Expected string pattern"),
810        }
811
812        match regex_pattern {
813            TokenPattern::Regex(r) => assert_eq!(r, r"\d+"),
814            _ => panic!("Expected regex pattern"),
815        }
816    }
817
818    #[test]
819    fn test_associativity() {
820        let left = Associativity::Left;
821        let right = Associativity::Right;
822        let none = Associativity::None;
823
824        assert_eq!(left, Associativity::Left);
825        assert_eq!(right, Associativity::Right);
826        assert_eq!(none, Associativity::None);
827
828        assert_ne!(left, right);
829        assert_ne!(left, none);
830        assert_ne!(right, none);
831    }
832
833    #[test]
834    fn test_conflict_resolution() {
835        let precedence_resolution = ConflictResolution::Precedence(PrecedenceKind::Static(5));
836        let associativity_resolution = ConflictResolution::Associativity(Associativity::Left);
837        let glr_resolution = ConflictResolution::GLR;
838
839        match precedence_resolution {
840            ConflictResolution::Precedence(PrecedenceKind::Static(5)) => {}
841            _ => panic!("Expected precedence resolution"),
842        }
843
844        match associativity_resolution {
845            ConflictResolution::Associativity(Associativity::Left) => {}
846            _ => panic!("Expected associativity resolution"),
847        }
848
849        match glr_resolution {
850            ConflictResolution::GLR => {}
851            _ => panic!("Expected GLR resolution"),
852        }
853    }
854
855    #[test]
856    fn test_grammar_with_rules_and_tokens() {
857        let mut grammar = Grammar::new("test_grammar".to_string());
858
859        // Add a rule: S -> NUMBER
860        let rule = Rule {
861            lhs: SymbolId(0),                         // S
862            rhs: vec![Symbol::Terminal(SymbolId(1))], // NUMBER
863            precedence: Some(PrecedenceKind::Static(1)),
864            associativity: Some(Associativity::Left),
865            fields: vec![(FieldId(0), 0)],
866            production_id: ProductionId(0),
867        };
868        grammar.add_rule(rule);
869
870        // Add a token
871        let token = Token {
872            name: "NUMBER".to_string(),
873            pattern: TokenPattern::Regex(r"\d+".to_string()),
874            fragile: false,
875        };
876        grammar.tokens.insert(SymbolId(1), token);
877
878        // Add fields in correct order
879        grammar.fields.insert(FieldId(0), "left".to_string());
880        grammar.fields.insert(FieldId(1), "right".to_string());
881
882        // Validation should pass
883        match grammar.validate() {
884            Ok(_) => {}
885            Err(e) => panic!("Grammar validation failed: {:?}", e),
886        }
887
888        assert_eq!(grammar.rules.len(), 1);
889        assert_eq!(grammar.tokens.len(), 1);
890        assert_eq!(grammar.fields.len(), 2);
891    }
892
893    #[test]
894    fn test_grammar_validation_unresolved_symbol() {
895        let mut grammar = Grammar::new("test".to_string());
896
897        // Add a rule that references a non-existent symbol
898        let rule = Rule {
899            lhs: SymbolId(0),
900            rhs: vec![Symbol::Terminal(SymbolId(999))], // Non-existent symbol
901            precedence: None,
902            associativity: None,
903            fields: vec![],
904            production_id: ProductionId(0),
905        };
906        grammar.add_rule(rule);
907
908        // Validation should fail
909        assert!(grammar.validate().is_err());
910
911        match grammar.validate() {
912            Err(GrammarError::UnresolvedSymbol(SymbolId(999))) => {}
913            _ => panic!("Expected unresolved symbol error"),
914        }
915    }
916
917    #[test]
918    fn test_grammar_validation_unresolved_external() {
919        let mut grammar = Grammar::new("test".to_string());
920
921        // Add a rule that references a non-existent external symbol
922        let rule = Rule {
923            lhs: SymbolId(0),
924            rhs: vec![Symbol::External(SymbolId(999))], // Non-existent external
925            precedence: None,
926            associativity: None,
927            fields: vec![],
928            production_id: ProductionId(0),
929        };
930        grammar.add_rule(rule);
931
932        // Validation should fail
933        assert!(grammar.validate().is_err());
934
935        match grammar.validate() {
936            Err(GrammarError::UnresolvedExternalSymbol(SymbolId(999))) => {}
937            _ => panic!("Expected unresolved external symbol error"),
938        }
939    }
940
941    #[test]
942    fn test_alias_sequence() {
943        let alias_seq = AliasSequence {
944            aliases: vec![Some("alias1".to_string()), None, Some("alias2".to_string())],
945        };
946
947        assert_eq!(alias_seq.aliases.len(), 3);
948        assert_eq!(alias_seq.aliases[0], Some("alias1".to_string()));
949        assert_eq!(alias_seq.aliases[1], None);
950        assert_eq!(alias_seq.aliases[2], Some("alias2".to_string()));
951    }
952
953    #[test]
954    fn test_external_token() {
955        let external_token = ExternalToken {
956            name: "HERE_STRING".to_string(),
957            symbol_id: SymbolId(42),
958        };
959
960        assert_eq!(external_token.name, "HERE_STRING");
961        assert_eq!(external_token.symbol_id, SymbolId(42));
962    }
963
964    #[test]
965    fn test_precedence() {
966        let precedence = Precedence {
967            level: 10,
968            associativity: Associativity::Right,
969            symbols: vec![SymbolId(1), SymbolId(2), SymbolId(3)],
970        };
971
972        assert_eq!(precedence.level, 10);
973        assert_eq!(precedence.associativity, Associativity::Right);
974        assert_eq!(precedence.symbols.len(), 3);
975        assert!(precedence.symbols.contains(&SymbolId(2)));
976    }
977
978    #[test]
979    fn test_conflict_declaration() {
980        let conflict = ConflictDeclaration {
981            symbols: vec![SymbolId(1), SymbolId(2)],
982            resolution: ConflictResolution::GLR,
983        };
984
985        assert_eq!(conflict.symbols.len(), 2);
986        assert!(conflict.symbols.contains(&SymbolId(1)));
987        assert!(conflict.symbols.contains(&SymbolId(2)));
988
989        match conflict.resolution {
990            ConflictResolution::GLR => {}
991            _ => panic!("Expected GLR resolution"),
992        }
993    }
994}