1#![forbid(unsafe_op_in_unsafe_fn)]
3#![deny(private_interfaces)]
4#![cfg_attr(feature = "strict_api", deny(unreachable_pub))]
5#![cfg_attr(not(feature = "strict_api"), warn(unreachable_pub))]
6#![cfg_attr(feature = "strict_docs", warn(missing_docs))]
7#![cfg_attr(not(feature = "strict_docs"), allow(missing_docs))]
8#![allow(
10 clippy::ptr_arg,
11 clippy::explicit_counter_loop,
12 clippy::needless_range_loop,
13 clippy::unused_enumerate_index
14)]
15
16use adze_ir::*;
54use fixedbitset::FixedBitSet;
55use indexmap::IndexMap;
56use serde::{Deserialize, Serialize};
57use std::collections::{BTreeMap, BTreeSet};
58
59pub mod error;
61pub use error::Result as GlrResult;
63
64pub use GLRError as GlrError;
66
67pub mod conflict_inspection;
69
70pub use adze_ir::{Grammar, RuleId, StateId, SymbolId};
73
74pub mod prelude {
76 pub use crate::{FirstFollowSets, ParseTable, build_lr1_automaton};
77}
78
79#[doc(hidden)]
81pub mod advanced_conflict;
82#[doc(hidden)]
83pub mod conflict_resolution;
84#[doc(hidden)]
85pub mod conflict_visualizer;
86#[doc(hidden)]
87pub mod disambiguation;
88#[doc(hidden)]
89pub mod gss;
90#[doc(hidden)]
91pub mod gss_arena;
92#[doc(hidden)]
93pub mod parse_forest;
94
95pub mod driver;
96pub mod forest_view;
97pub mod stack;
98pub mod telemetry;
100pub mod ts_lexer;
102
103#[cfg(feature = "serialization")]
105pub mod serialization;
106
107#[cfg(any(feature = "glr_trace", feature = "debug_glr"))]
110#[macro_export]
111macro_rules! debug_trace {
112 ($($t:tt)*) => { eprintln!("[GLR] {}", format!($($t)*)); }
113}
114#[cfg(not(any(feature = "glr_trace", feature = "debug_glr")))]
115#[macro_export]
116macro_rules! debug_trace {
117 ($($t:tt)*) => {};
118}
119
120#[cfg(any(feature = "glr_trace", feature = "debug_glr"))]
122#[macro_export]
123macro_rules! glr_trace {
124 ($($t:tt)*) => { debug_trace!($($t)*); }
125}
126#[cfg(not(any(feature = "glr_trace", feature = "debug_glr")))]
127#[macro_export]
128macro_rules! glr_trace {
129 ($($t:tt)*) => { debug_trace!($($t)*); }
130}
131
132#[doc(hidden)]
133pub mod perf_optimizations;
134#[doc(hidden)]
135pub mod precedence_compare;
136#[doc(hidden)]
137pub mod symbol_comparison;
138#[doc(hidden)]
139pub mod version_info;
140
141pub mod lib_v2;
142
143#[cfg(any(test, feature = "test-api"))]
144pub mod test_helpers;
146
147#[cfg(test)]
148pub mod test_symbol_alloc;
150
151#[doc(hidden)]
152pub use advanced_conflict::{
153 ConflictAnalyzer, ConflictStats, PrecedenceDecision, PrecedenceResolver,
154};
155#[doc(hidden)]
156pub use conflict_resolution::{RuntimeConflictResolver, VecWrapperResolver};
157#[doc(hidden)]
158pub use conflict_visualizer::{ConflictVisualizer, generate_dot_graph};
159#[doc(hidden)]
160pub use gss::{GSSStats, GraphStructuredStack, StackNode};
161#[doc(hidden)]
162pub use parse_forest::{ForestNode, ParseError, ParseForest, ParseNode, ParseTree};
163#[doc(hidden)]
164pub use perf_optimizations::{ParseTableCache, PerfStats, StackDeduplicator, StackPool};
165#[doc(hidden)]
166pub use precedence_compare::{
167 PrecedenceComparison, PrecedenceInfo, StaticPrecedenceResolver, compare_precedences,
168};
169#[doc(hidden)]
170pub use symbol_comparison::{compare_symbols, compare_versions_with_symbols};
171#[doc(hidden)]
172pub use version_info::{CompareResult, VersionInfo, compare_versions};
173
174const EOF_SENTINEL: SymbolId = SymbolId(0);
188
189#[inline]
192fn map_follow_symbol(sym: SymbolId, eof_symbol: SymbolId) -> SymbolId {
193 if sym == EOF_SENTINEL { eof_symbol } else { sym }
194}
195
196#[derive(Copy, Clone, Debug, Eq, PartialEq)]
198enum Assoc {
199 Left,
200 Right,
201 None,
202}
203
204#[derive(Copy, Clone, Debug)]
205struct TokPrec {
206 prec: u8,
207 assoc: Assoc,
208}
209
210#[derive(Copy, Clone, Debug)]
211struct RulePrec {
212 prec: u8,
213 assoc: Assoc,
214}
215
216struct PrecTables {
217 tok_prec_by_index: Vec<Option<TokPrec>>,
219 rule_prec: Vec<RulePrec>,
221}
222
223fn build_prec_tables(
224 grammar: &Grammar,
225 symbol_to_index: &BTreeMap<SymbolId, usize>,
226 token_count: u32,
227 production_count: u32,
228) -> PrecTables {
229 use adze_ir::{Associativity, PrecedenceKind};
230
231 debug_assert!(production_count > 0, "production_count must be positive");
234
235 let mut tok_prec_by_index = vec![None; symbol_to_index.len()];
237 let tok_prec_len = tok_prec_by_index.len(); let mut set_tok_prec = |tok_idx: usize, new: TokPrec| {
241 if tok_idx >= tok_prec_by_index.len() {
242 return; }
244 tok_prec_by_index[tok_idx] = match tok_prec_by_index[tok_idx] {
245 None => Some(new),
246 Some(old) => Some(if new.prec > old.prec { new } else { old }),
247 };
248 };
249
250 let mut rule_prec = vec![
252 RulePrec {
253 prec: 0,
254 assoc: Assoc::None
255 };
256 production_count as usize
257 ];
258
259 for rules in grammar.rules.values() {
261 for rule in rules {
262 let pid = rule.production_id.0 as usize;
263 if pid >= production_count as usize {
265 continue;
266 }
267
268 let explicit = rule.precedence.and_then(|p| {
270 if let PrecedenceKind::Static(level) = p {
271 Some(level as u8)
272 } else {
273 None
274 }
275 });
276
277 let rule_assoc = rule
279 .associativity
280 .map(|assoc| match assoc {
281 Associativity::Left => Assoc::Left,
282 Associativity::Right => Assoc::Right,
283 Associativity::None => Assoc::None,
284 })
285 .unwrap_or(Assoc::None);
286
287 if let Some(level) = explicit {
290 let tok_idx_opt = rule.rhs.iter().rev().find_map(|sym| {
292 if let Symbol::Terminal(id) = sym {
293 symbol_to_index.get(id).copied()
294 } else {
295 None
296 }
297 });
298
299 if let Some(tok_idx) = tok_idx_opt
300 && tok_idx < tok_prec_len
301 {
302 set_tok_prec(
303 tok_idx,
304 TokPrec {
305 prec: level,
306 assoc: rule_assoc,
307 },
308 );
309 }
310 }
311
312 rule_prec[pid] = RulePrec {
314 prec: explicit.unwrap_or(0),
315 assoc: rule_assoc,
316 };
317 }
318 }
319
320 for rules in grammar.rules.values() {
322 for rule in rules {
323 let pid = rule.production_id.0 as usize;
324 if pid >= production_count as usize {
325 continue;
326 }
327
328 if rule_prec[pid].prec > 0 {
330 continue;
331 }
332
333 let derived = rule
335 .rhs
336 .iter()
337 .rev()
338 .find_map(|sym| {
339 if let Symbol::Terminal(id) = sym {
340 symbol_to_index.get(id).and_then(|&idx| {
341 if (idx as u32) < token_count {
342 tok_prec_by_index[idx]
343 } else {
344 None
345 }
346 })
347 } else {
348 None
349 }
350 })
351 .unwrap_or(TokPrec {
352 prec: 0,
353 assoc: Assoc::None,
354 });
355
356 rule_prec[pid] = RulePrec {
357 prec: derived.prec,
358 assoc: derived.assoc,
359 };
360 }
361 }
362
363 PrecTables {
364 tok_prec_by_index,
365 rule_prec,
366 }
367}
368
369#[derive(Copy, Clone, Debug, Eq, PartialEq)]
370enum PrecDecision {
371 PreferShift,
372 PreferReduce,
373 Error,
374 NoInfo,
375}
376
377#[inline]
378fn decide_with_precedence(
379 lookahead_tok_idx: usize, reduce_prod_id: u16, prec: &PrecTables,
382) -> PrecDecision {
383 if reduce_prod_id as usize >= prec.rule_prec.len() {
385 return PrecDecision::NoInfo;
386 }
387
388 let tokp = match prec
389 .tok_prec_by_index
390 .get(lookahead_tok_idx)
391 .and_then(|o| *o)
392 {
393 Some(p) => p,
394 None => return PrecDecision::NoInfo,
395 };
396 let rulep = prec.rule_prec[reduce_prod_id as usize];
397
398 if tokp.prec == 0 || rulep.prec == 0 {
400 return PrecDecision::NoInfo;
401 }
402
403 use core::cmp::Ordering::*;
404 match (tokp.prec.cmp(&rulep.prec), rulep.assoc) {
407 (Greater, _) => PrecDecision::PreferShift,
408 (Less, _) => PrecDecision::PreferReduce,
409 (Equal, Assoc::Left) => PrecDecision::PreferReduce, (Equal, Assoc::Right) => PrecDecision::PreferShift, (Equal, Assoc::None) => PrecDecision::Error,
412 }
413}
414
415#[inline]
417fn decide_reduce_reduce(a: u16, b: u16, prec: &PrecTables) -> u16 {
418 let pa = prec.rule_prec.get(a as usize).map(|r| r.prec).unwrap_or(0);
419 let pb = prec.rule_prec.get(b as usize).map(|r| r.prec).unwrap_or(0);
420 if pa > pb {
421 a
422 } else if pb > pa {
423 b
424 } else {
425 a.min(b)
426 }
427}
428
429pub use driver::Driver;
432pub use forest_view::{Forest, ForestView, Span};
434
435#[cfg(feature = "perf_counters")]
437#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
438pub mod perf {
439 use std::sync::atomic::{AtomicU64, Ordering};
440
441 #[derive(Clone, Debug, Default)]
443 pub struct Counters {
444 pub shifts: u64,
446 pub reductions: u64,
448 pub forks: u64,
450 pub merges: u64,
452 }
453
454 static SHIFTS: AtomicU64 = AtomicU64::new(0);
455 static REDUCTIONS: AtomicU64 = AtomicU64::new(0);
456 static FORKS: AtomicU64 = AtomicU64::new(0);
457 static MERGES: AtomicU64 = AtomicU64::new(0);
458
459 #[inline]
461 pub fn inc_shifts(n: u64) {
462 SHIFTS.fetch_add(n, Ordering::Relaxed);
463 }
464
465 #[inline]
467 pub fn inc_reductions(n: u64) {
468 REDUCTIONS.fetch_add(n, Ordering::Relaxed);
469 }
470
471 #[inline]
473 pub fn inc_forks(n: u64) {
474 FORKS.fetch_add(n, Ordering::Relaxed);
475 }
476
477 #[inline]
479 pub fn inc_merges(n: u64) {
480 MERGES.fetch_add(n, Ordering::Relaxed);
481 }
482
483 pub fn snapshot() -> Counters {
485 Counters {
486 shifts: SHIFTS.load(Ordering::Relaxed),
487 reductions: REDUCTIONS.load(Ordering::Relaxed),
488 forks: FORKS.load(Ordering::Relaxed),
489 merges: MERGES.load(Ordering::Relaxed),
490 }
491 }
492
493 pub fn take() -> Counters {
495 Counters {
496 shifts: SHIFTS.swap(0, Ordering::Relaxed),
497 reductions: REDUCTIONS.swap(0, Ordering::Relaxed),
498 forks: FORKS.swap(0, Ordering::Relaxed),
499 merges: MERGES.swap(0, Ordering::Relaxed),
500 }
501 }
502
503 pub fn reset() {
505 SHIFTS.store(0, Ordering::Relaxed);
506 REDUCTIONS.store(0, Ordering::Relaxed);
507 FORKS.store(0, Ordering::Relaxed);
508 MERGES.store(0, Ordering::Relaxed);
509 }
510}
511
512#[cfg(not(feature = "perf_counters"))]
514#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
515pub mod perf {
516 #[derive(Clone, Debug, Default)]
518 pub struct Counters {
519 pub shifts: u64,
521 pub reductions: u64,
523 pub forks: u64,
525 pub merges: u64,
527 }
528
529 #[inline(always)]
531 pub fn inc_shifts(_: u64) {}
532
533 #[inline(always)]
535 pub fn inc_reductions(_: u64) {}
536
537 #[inline(always)]
539 pub fn inc_forks(_: u64) {}
540
541 #[inline(always)]
543 pub fn inc_merges(_: u64) {}
544
545 #[inline(always)]
547 pub fn snapshot() -> Counters {
548 Counters::default()
549 }
550
551 #[inline(always)]
553 pub fn take() -> Counters {
554 Counters::default()
555 }
556
557 #[inline(always)]
559 pub fn reset() {}
560}
561
562#[derive(Debug, Clone)]
564pub struct FirstFollowSets {
565 first: IndexMap<SymbolId, FixedBitSet>,
566 follow: IndexMap<SymbolId, FixedBitSet>,
567 nullable: FixedBitSet,
568 #[allow(dead_code)]
569 symbol_count: usize,
570}
571
572impl FirstFollowSets {
573 fn get_max_symbol_id(symbol: &Symbol) -> u16 {
574 match symbol {
575 Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id.0,
576 Symbol::Optional(inner) | Symbol::Repeat(inner) | Symbol::RepeatOne(inner) => {
577 Self::get_max_symbol_id(inner)
578 }
579 Symbol::Choice(choices) => choices
580 .iter()
581 .map(Self::get_max_symbol_id)
582 .max()
583 .unwrap_or(0),
584 Symbol::Sequence(seq) => seq.iter().map(Self::get_max_symbol_id).max().unwrap_or(0),
585 Symbol::Epsilon => 0,
586 }
587 }
588
589 #[must_use = "computation result must be checked"]
618 pub fn compute_normalized(grammar: &mut Grammar) -> Result<Self, GLRError> {
619 grammar.normalize();
621
622 Self::compute(grammar)
624 }
625
626 #[must_use = "computation result must be checked"]
649 pub fn compute(grammar: &Grammar) -> Result<Self, GLRError> {
650 let normalized_grammar = {
652 let mut cloned = grammar.clone();
653 let _ = cloned.normalize(); cloned
655 };
656
657 let grammar = &normalized_grammar;
659 let max_rule_id = grammar.rules.keys().map(|id| id.0).max().unwrap_or(0);
661 let max_token_id = grammar.tokens.keys().map(|id| id.0).max().unwrap_or(0);
662 let max_external_id = grammar
663 .externals
664 .iter()
665 .map(|e| e.symbol_id.0)
666 .max()
667 .unwrap_or(0);
668
669 let mut max_rhs_id = 0u16;
671 for rules in grammar.rules.values() {
672 for rule in rules {
673 for symbol in &rule.rhs {
674 max_rhs_id = max_rhs_id.max(Self::get_max_symbol_id(symbol));
675 }
676 }
677 }
678
679 let symbol_count = (max_rule_id
680 .max(max_token_id)
681 .max(max_external_id)
682 .max(max_rhs_id)
683 + 2) as usize; let mut first = IndexMap::new();
686 let mut follow = IndexMap::new();
687 let mut nullable = FixedBitSet::with_capacity(symbol_count);
688
689 for &symbol_id in grammar.rules.keys().chain(grammar.tokens.keys()) {
691 first.insert(symbol_id, FixedBitSet::with_capacity(symbol_count));
692 follow.insert(symbol_id, FixedBitSet::with_capacity(symbol_count));
693 }
694
695 let mut changed = true;
697 while changed {
698 changed = false;
699
700 for rule in grammar.all_rules() {
701 let lhs = rule.lhs;
702 let mut rule_nullable = true;
703
704 for symbol in &rule.rhs {
705 match symbol {
706 Symbol::Terminal(id) => {
707 if let Some(first_set) = first.get_mut(&lhs)
708 && !first_set.contains(id.0 as usize)
709 {
710 first_set.insert(id.0 as usize);
711 changed = true;
712 }
713 rule_nullable = false;
714 break;
715 }
716 Symbol::NonTerminal(id) | Symbol::External(id) => {
717 if let Some(symbol_first) = first.get(id).cloned()
718 && let Some(lhs_first) = first.get_mut(&lhs)
719 {
720 let old_len = lhs_first.count_ones(..);
721 lhs_first.union_with(&symbol_first);
722 if lhs_first.count_ones(..) > old_len {
723 changed = true;
724 }
725 }
726
727 if !nullable.contains(id.0 as usize) {
728 rule_nullable = false;
729 break;
730 }
731 }
732 Symbol::Epsilon => {
733 }
736 Symbol::Optional(_)
737 | Symbol::Repeat(_)
738 | Symbol::RepeatOne(_)
739 | Symbol::Choice(_)
740 | Symbol::Sequence(_) => {
741 return Err(GLRError::ComplexSymbolsNotNormalized {
743 operation: "FIRST/FOLLOW computation".to_string(),
744 });
745 }
746 }
747 }
748
749 if rule_nullable && !nullable.contains(lhs.0 as usize) {
750 nullable.insert(lhs.0 as usize);
751 changed = true;
752 }
753 }
754 }
755
756 if let Some(start_symbol) = grammar.start_symbol()
759 && let Some(follow_set) = follow.get_mut(&start_symbol)
760 {
761 follow_set.insert(0); }
763
764 changed = true;
765 while changed {
766 changed = false;
767
768 for rule in grammar.all_rules() {
769 if rule.rhs.len() >= 2
771 && let (Symbol::NonTerminal(first_id), Symbol::NonTerminal(second_id)) =
772 (&rule.rhs[0], &rule.rhs[1])
773 && *first_id == rule.lhs
774 {
775 if let Some(first_of_second) = first.get(second_id)
778 && let Some(follow_set) = follow.get_mut(&rule.lhs)
779 {
780 let old_len = follow_set.count_ones(..);
781 follow_set.union_with(first_of_second);
782 if follow_set.count_ones(..) > old_len {
783 changed = true;
784 }
785 }
786 }
787
788 for (i, symbol) in rule.rhs.iter().enumerate() {
789 if let Symbol::NonTerminal(id) | Symbol::External(id) = symbol {
790 let remaining = &rule.rhs[i + 1..];
792 let first_of_remaining =
793 Self::first_of_sequence_static(remaining, &first, &nullable)?;
794
795 if let Some(follow_set) = follow.get_mut(id) {
796 let old_len = follow_set.count_ones(..);
797 follow_set.union_with(&first_of_remaining);
798 if follow_set.count_ones(..) > old_len {
799 changed = true;
800 }
801 }
802
803 if Self::sequence_is_nullable(remaining, &nullable)
805 && let Some(lhs_follow) = follow.get(&rule.lhs).cloned()
806 && let Some(follow_set) = follow.get_mut(id)
807 {
808 let old_len = follow_set.count_ones(..);
809 follow_set.union_with(&lhs_follow);
810 if follow_set.count_ones(..) > old_len {
811 changed = true;
812 }
813 }
814 }
815 }
816 }
817 }
818
819 Ok(Self {
820 first,
821 follow,
822 nullable,
823 symbol_count,
824 })
825 }
826
827 #[must_use = "computation result must be checked"]
829 pub fn first_of_sequence(&self, symbols: &[Symbol]) -> Result<FixedBitSet, GLRError> {
830 Self::first_of_sequence_static(symbols, &self.first, &self.nullable)
831 }
832
833 fn first_of_sequence_static(
834 symbols: &[Symbol],
835 first: &IndexMap<SymbolId, FixedBitSet>,
836 nullable: &FixedBitSet,
837 ) -> Result<FixedBitSet, GLRError> {
838 let mut result = FixedBitSet::with_capacity(nullable.len());
839
840 for symbol in symbols {
841 match symbol {
842 Symbol::Terminal(id) => {
843 result.insert(id.0 as usize);
844 break;
845 }
846 Symbol::Epsilon => {
847 }
849 Symbol::NonTerminal(id) | Symbol::External(id) => {
850 if let Some(symbol_first) = first.get(id) {
851 result.union_with(symbol_first);
852 }
853
854 if !nullable.contains(id.0 as usize) {
855 break;
856 }
857 }
858 Symbol::Optional(_)
859 | Symbol::Repeat(_)
860 | Symbol::RepeatOne(_)
861 | Symbol::Choice(_)
862 | Symbol::Sequence(_) => {
863 return Err(GLRError::ComplexSymbolsNotNormalized {
864 operation: "FIRST/FOLLOW computation".to_string(),
865 });
866 }
867 }
868 }
869
870 Ok(result)
871 }
872
873 fn sequence_is_nullable(symbols: &[Symbol], nullable: &FixedBitSet) -> bool {
874 symbols.iter().all(|symbol| match symbol {
875 Symbol::Terminal(_) => false,
876 Symbol::NonTerminal(id) | Symbol::External(id) => nullable.contains(id.0 as usize),
877 Symbol::Epsilon => true,
878 Symbol::Optional(_)
879 | Symbol::Repeat(_)
880 | Symbol::RepeatOne(_)
881 | Symbol::Choice(_)
882 | Symbol::Sequence(_) => {
883 panic!("Complex symbols should be normalized before FIRST/FOLLOW computation");
884 }
885 })
886 }
887
888 pub fn first(&self, symbol: SymbolId) -> Option<&FixedBitSet> {
890 self.first.get(&symbol)
891 }
892
893 pub fn follow(&self, symbol: SymbolId) -> Option<&FixedBitSet> {
895 self.follow.get(&symbol)
896 }
897
898 pub fn is_nullable(&self, symbol: SymbolId) -> bool {
900 self.nullable.contains(symbol.0 as usize)
901 }
902}
903
904#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
906pub struct LRItem {
907 pub rule_id: RuleId,
909 pub position: usize,
911 pub lookahead: SymbolId,
913}
914
915impl LRItem {
916 pub fn new(rule_id: RuleId, position: usize, lookahead: SymbolId) -> Self {
918 Self {
919 rule_id,
920 position,
921 lookahead,
922 }
923 }
924
925 pub fn is_reduce_item(&self, grammar: &Grammar) -> bool {
927 if let Some(rule) = grammar
928 .all_rules()
929 .find(|r| r.production_id.0 == self.rule_id.0)
930 {
931 if rule.rhs.len() == 1 && matches!(rule.rhs[0], Symbol::Epsilon) {
934 return true; }
936
937 self.position >= rule.rhs.len()
938 } else {
939 false
940 }
941 }
942
943 pub fn next_symbol<'a>(&self, grammar: &'a Grammar) -> Option<&'a Symbol> {
945 if let Some(rule) = grammar
946 .all_rules()
947 .find(|r| r.production_id.0 == self.rule_id.0)
948 {
949 rule.rhs.get(self.position)
950 } else {
951 None
952 }
953 }
954}
955
956#[derive(Debug, Clone, PartialEq, Eq)]
958pub struct ItemSet {
959 pub items: BTreeSet<LRItem>,
961 pub id: StateId,
963}
964
965impl ItemSet {
966 pub fn new(id: StateId) -> Self {
968 Self {
969 items: BTreeSet::new(),
970 id,
971 }
972 }
973
974 pub fn add_item(&mut self, item: LRItem) {
976 self.items.insert(item);
977 }
978
979 pub fn closure(
981 &mut self,
982 grammar: &Grammar,
983 first_follow: &FirstFollowSets,
984 ) -> Result<(), GLRError> {
985 let _initial_size = self.items.len();
986
987 let mut added = true;
988 let mut _iteration = 0;
989 while added {
990 added = false;
991 _iteration += 1;
992 let current_items: Vec<_> = self.items.iter().cloned().collect();
993
994 for item in current_items {
995 if let Some(Symbol::NonTerminal(symbol_id)) = item.next_symbol(grammar) {
996 if let Some(rules) = grammar.get_rules_for_symbol(*symbol_id) {
998 for rule in rules {
999 let mut beta = Vec::new();
1002 if let Some(current_rule) = grammar
1003 .all_rules()
1004 .find(|r| r.production_id.0 == item.rule_id.0)
1005 {
1006 beta.extend_from_slice(¤t_rule.rhs[item.position + 1..]);
1007 }
1008 beta.push(Symbol::Terminal(item.lookahead));
1009
1010 let first_beta_alpha = first_follow.first_of_sequence(&beta)?;
1011
1012 for lookahead_idx in first_beta_alpha.ones() {
1014 let new_item = LRItem::new(
1015 RuleId(rule.production_id.0),
1016 0,
1017 SymbolId(lookahead_idx as u16),
1018 );
1019
1020 if !self.items.contains(&new_item) {
1021 self.items.insert(new_item);
1022 added = true;
1023 if rule.rhs.is_empty() {
1024 }
1026 }
1027 }
1028 }
1029 }
1030 }
1031 }
1032 }
1033
1034 Ok(())
1036 }
1037
1038 pub fn goto(
1040 &self,
1041 symbol: &Symbol,
1042 grammar: &Grammar,
1043 _first_follow: &FirstFollowSets,
1044 ) -> ItemSet {
1045 let mut new_set = ItemSet::new(StateId(0)); for item in &self.items {
1049 if let Some(next_sym) = item.next_symbol(grammar)
1050 && std::mem::discriminant(next_sym) == std::mem::discriminant(symbol)
1051 {
1052 match (next_sym, symbol) {
1053 (Symbol::Terminal(a), Symbol::Terminal(b))
1054 | (Symbol::NonTerminal(a), Symbol::NonTerminal(b))
1055 | (Symbol::External(a), Symbol::External(b))
1056 if a == b =>
1057 {
1058 let new_item = LRItem::new(item.rule_id, item.position + 1, item.lookahead);
1059 new_set.add_item(new_item);
1060 }
1061 _ => {}
1062 }
1063 }
1064 }
1065
1066 let _ = new_set.closure(grammar, _first_follow);
1068 new_set
1069 }
1070}
1071
1072#[derive(Debug, Clone)]
1074#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
1075pub struct ItemSetCollection {
1076 pub sets: Vec<ItemSet>,
1078 pub goto_table: IndexMap<(StateId, SymbolId), StateId>,
1080 pub symbol_is_terminal: IndexMap<SymbolId, bool>,
1082}
1083
1084impl ItemSetCollection {
1085 pub fn build_canonical_collection_augmented(
1087 grammar: &Grammar,
1088 first_follow: &FirstFollowSets,
1089 augmented_start: SymbolId,
1090 _original_start: SymbolId,
1091 eof_symbol: SymbolId,
1092 ) -> Self {
1093 let mut collection = Self {
1094 sets: Vec::new(),
1095 goto_table: IndexMap::new(),
1096 symbol_is_terminal: IndexMap::new(),
1097 };
1098
1099 let mut initial_set = ItemSet::new(StateId(0));
1101
1102 if let Some(augmented_rules) = grammar.get_rules_for_symbol(augmented_start) {
1104 for rule in augmented_rules {
1105 let start_item = LRItem::new(
1107 RuleId(rule.production_id.0),
1108 0,
1109 eof_symbol, );
1111 initial_set.add_item(start_item);
1112 }
1113 }
1114
1115 let _ = initial_set.closure(grammar, first_follow);
1117 debug_trace!(
1118 "Initial state 0 after closure has {} items:",
1119 initial_set.items.len()
1120 );
1121
1122 let mut expected_terminals = std::collections::BTreeSet::new();
1124 let mut expected_nonterminals = std::collections::BTreeSet::new();
1125
1126 for item in &initial_set.items {
1127 if let Some(rule) = grammar
1129 .all_rules()
1130 .find(|r| r.production_id.0 == item.rule_id.0)
1131 {
1132 let mut rhs_str = String::new();
1133 for (idx, sym) in rule.rhs.iter().enumerate() {
1134 if idx == item.position {
1135 rhs_str.push_str(" • ");
1136 }
1137 match sym {
1138 Symbol::Terminal(id) => rhs_str.push_str(&format!("T({}) ", id.0)),
1139 Symbol::NonTerminal(id) => rhs_str.push_str(&format!("NT({}) ", id.0)),
1140 _ => rhs_str.push_str("? "),
1141 }
1142 }
1143 if item.position == rule.rhs.len() {
1144 rhs_str.push_str(" • ");
1145 }
1146 debug_trace!(
1147 " Item: NT({}) -> {}, lookahead={}",
1148 rule.lhs.0,
1149 rhs_str,
1150 item.lookahead.0
1151 );
1152
1153 if item.position < rule.rhs.len() {
1155 match &rule.rhs[item.position] {
1156 Symbol::Terminal(t) => {
1157 expected_terminals.insert(*t);
1158 }
1159 Symbol::NonTerminal(nt) => {
1160 expected_nonterminals.insert(*nt);
1161 }
1162 _ => {}
1163 }
1164 }
1165 }
1166 }
1167
1168 debug_trace!("State 0 expects transitions for:");
1169 debug_trace!(" Terminals: {:?}", expected_terminals);
1170 debug_trace!(" Nonterminals: {:?}", expected_nonterminals);
1171
1172 collection.sets.push(initial_set);
1173 let mut state_counter = 1;
1174
1175 let mut i = 0;
1177 while i < collection.sets.len() {
1178 let current_set = collection.sets[i].clone();
1179
1180 for item in ¤t_set.items {
1182 if let Some(rule) = grammar
1183 .all_rules()
1184 .find(|r| r.production_id.0 == item.rule_id.0)
1185 {
1186 let mut rhs_str = String::new();
1187 for (idx, sym) in rule.rhs.iter().enumerate() {
1188 if idx == item.position {
1189 rhs_str.push_str(" • ");
1190 }
1191 rhs_str.push_str(&format!("{:?} ", sym));
1192 }
1193 if item.position == rule.rhs.len() {
1194 rhs_str.push_str(" • ");
1195 }
1196 }
1198 }
1199
1200 let mut symbols = BTreeSet::new();
1202 let mut _terminal_count = 0;
1203 let mut _non_terminal_count = 0;
1204 if i == 0 {
1205 debug_trace!("\n=== State 0 Analysis ===");
1206 debug_trace!("State 0 has {} items:", current_set.items.len());
1207 }
1208 for (_idx, item) in current_set.items.iter().enumerate() {
1209 if i == 0 {
1210 if let Some(rule) = grammar
1212 .all_rules()
1213 .find(|r| r.production_id.0 == item.rule_id.0)
1214 {
1215 let mut item_str = String::new();
1216 item_str.push_str(&format!("NT({}) -> ", rule.lhs.0));
1217 for (pos, sym) in rule.rhs.iter().enumerate() {
1218 if pos == item.position {
1219 item_str.push_str("• ");
1220 }
1221 match sym {
1222 Symbol::Terminal(t) => item_str.push_str(&format!("T({}) ", t.0)),
1223 Symbol::NonTerminal(nt) => {
1224 item_str.push_str(&format!("NT({}) ", nt.0))
1225 }
1226 Symbol::External(e) => item_str.push_str(&format!("EXT({}) ", e.0)),
1227 _ => item_str.push_str(&format!("{:?} ", sym)),
1228 }
1229 }
1230 if item.position == rule.rhs.len() {
1231 item_str.push_str("• ");
1232 }
1233 debug_trace!(" Item {}: {} (rule_id={})", _idx, item_str, item.rule_id.0);
1234 }
1235 }
1236
1237 if let Some(symbol) = item.next_symbol(grammar) {
1238 match symbol {
1239 Symbol::Terminal(_id) => {
1240 _terminal_count += 1;
1241 }
1242 Symbol::NonTerminal(_id) => {
1243 _non_terminal_count += 1;
1244 }
1245 Symbol::External(_id) => {
1246 _terminal_count += 1; }
1248 _ => {}
1249 }
1250 symbols.insert(symbol.clone());
1251 if i == 0 {
1252 debug_trace!(" -> next symbol: {:?}", symbol);
1253 }
1254 }
1255 }
1256
1257 if i == 0 {
1258 debug_trace!("\nState 0 summary:");
1259 debug_trace!(" Total symbols that can be shifted: {}", symbols.len());
1260 debug_trace!(" Terminals: {}", _terminal_count);
1261 debug_trace!(" Non-terminals: {}", _non_terminal_count);
1262 debug_trace!(" Symbols: {:?}\n", symbols);
1263 }
1264
1265 for symbol in symbols {
1268 let goto_set = current_set.goto(&symbol, grammar, first_follow);
1269
1270 if !goto_set.items.is_empty() {
1271 let existing_state = collection
1273 .sets
1274 .iter()
1275 .find(|set| set.items == goto_set.items)
1276 .map(|set| set.id);
1277
1278 let target_state = if let Some(existing_id) = existing_state {
1279 existing_id
1280 } else {
1281 let new_id = StateId(state_counter);
1283 let mut new_set = goto_set;
1284 new_set.id = new_id;
1285 collection.sets.push(new_set);
1286 state_counter += 1;
1287 new_id
1288 };
1289
1290 let symbol_id = match symbol {
1292 Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
1293 Symbol::Optional(_)
1294 | Symbol::Repeat(_)
1295 | Symbol::RepeatOne(_)
1296 | Symbol::Choice(_)
1297 | Symbol::Sequence(_)
1298 | Symbol::Epsilon => {
1299 panic!(
1300 "Complex symbols should be normalized before LR item generation"
1301 );
1302 }
1303 };
1304 if current_set.id.0 == 0 {
1305 debug_trace!(
1306 " State 0 GOTO: symbol {:?} -> state {}",
1307 symbol_id,
1308 target_state.0
1309 );
1310 }
1311 collection
1312 .goto_table
1313 .insert((current_set.id, symbol_id), target_state);
1314
1315 let is_terminal = matches!(symbol, Symbol::Terminal(_) | Symbol::External(_));
1317 collection.symbol_is_terminal.insert(symbol_id, is_terminal);
1318 }
1320 }
1321
1322 i += 1;
1323 }
1324
1325 collection
1326 }
1327
1328 pub fn build_canonical_collection(grammar: &Grammar, first_follow: &FirstFollowSets) -> Self {
1351 let mut collection = Self {
1352 sets: Vec::new(),
1353 goto_table: IndexMap::new(),
1354 symbol_is_terminal: IndexMap::new(),
1355 };
1356
1357 let mut initial_set = ItemSet::new(StateId(0));
1359
1360 if let Some(start_symbol) = grammar.start_symbol() {
1362 if let Some(start_rules) = grammar.get_rules_for_symbol(start_symbol) {
1366 for rule in start_rules.iter() {
1367 let start_item = LRItem::new(
1369 RuleId(rule.production_id.0),
1370 0,
1371 SymbolId(0), );
1373 initial_set.add_item(start_item);
1374 }
1376 }
1377
1378 let _ = initial_set.closure(grammar, first_follow);
1380 }
1381
1382 if initial_set.items.is_empty() {
1384 } else {
1386 for _item in &initial_set.items {
1387 }
1389 }
1390
1391 collection.sets.push(initial_set);
1392 let mut state_counter = 1;
1393
1394 let mut i = 0;
1396 while i < collection.sets.len() {
1397 let current_set = collection.sets[i].clone();
1398
1399 for item in ¤t_set.items {
1401 if let Some(rule) = grammar
1402 .all_rules()
1403 .find(|r| r.production_id.0 == item.rule_id.0)
1404 {
1405 let mut rhs_str = String::new();
1406 for (idx, sym) in rule.rhs.iter().enumerate() {
1407 if idx == item.position {
1408 rhs_str.push_str(" • ");
1409 }
1410 rhs_str.push_str(&format!("{:?} ", sym));
1411 }
1412 if item.position == rule.rhs.len() {
1413 rhs_str.push_str(" • ");
1414 }
1415 }
1417 }
1418
1419 let mut symbols = BTreeSet::new();
1421 let mut _terminal_count = 0;
1422 let mut _non_terminal_count = 0;
1423 if i == 0 {
1424 debug_trace!("\n=== State 0 Analysis ===");
1425 debug_trace!("State 0 has {} items:", current_set.items.len());
1426 }
1427 for (_idx, item) in current_set.items.iter().enumerate() {
1428 if i == 0 {
1429 if let Some(rule) = grammar
1431 .all_rules()
1432 .find(|r| r.production_id.0 == item.rule_id.0)
1433 {
1434 let mut item_str = String::new();
1435 item_str.push_str(&format!("NT({}) -> ", rule.lhs.0));
1436 for (pos, sym) in rule.rhs.iter().enumerate() {
1437 if pos == item.position {
1438 item_str.push_str("• ");
1439 }
1440 match sym {
1441 Symbol::Terminal(t) => item_str.push_str(&format!("T({}) ", t.0)),
1442 Symbol::NonTerminal(nt) => {
1443 item_str.push_str(&format!("NT({}) ", nt.0))
1444 }
1445 Symbol::External(e) => item_str.push_str(&format!("EXT({}) ", e.0)),
1446 _ => item_str.push_str(&format!("{:?} ", sym)),
1447 }
1448 }
1449 if item.position == rule.rhs.len() {
1450 item_str.push_str("• ");
1451 }
1452 debug_trace!(" Item {}: {} (rule_id={})", _idx, item_str, item.rule_id.0);
1453 }
1454 }
1455
1456 if let Some(symbol) = item.next_symbol(grammar) {
1457 match symbol {
1458 Symbol::Terminal(_id) => {
1459 _terminal_count += 1;
1460 }
1461 Symbol::NonTerminal(_id) => {
1462 _non_terminal_count += 1;
1463 }
1464 Symbol::External(_id) => {
1465 _terminal_count += 1; }
1467 _ => {}
1468 }
1469 symbols.insert(symbol.clone());
1470 if i == 0 {
1471 debug_trace!(" -> next symbol: {:?}", symbol);
1472 }
1473 }
1474 }
1475
1476 if i == 0 {
1477 debug_trace!("\nState 0 summary:");
1478 debug_trace!(" Total symbols that can be shifted: {}", symbols.len());
1479 debug_trace!(" Terminals: {}", _terminal_count);
1480 debug_trace!(" Non-terminals: {}", _non_terminal_count);
1481 debug_trace!(" Symbols: {:?}\n", symbols);
1482 }
1483
1484 for item in ¤t_set.items {
1486 if let Some(symbol) = item.next_symbol(grammar) {
1487 let _symbol_id = match &symbol {
1488 Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
1489 _ => panic!("Complex symbol"),
1490 };
1491 }
1493 }
1494
1495 for symbol in &symbols {
1496 let _symbol_id = match symbol {
1497 Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
1498 _ => panic!("Complex symbol"),
1499 };
1500 }
1501
1502 for symbol in symbols {
1504 let goto_set = current_set.goto(&symbol, grammar, first_follow);
1505
1506 if !goto_set.items.is_empty() {
1507 let existing_state = collection
1509 .sets
1510 .iter()
1511 .find(|set| set.items == goto_set.items)
1512 .map(|set| set.id);
1513
1514 let target_state = if let Some(existing_id) = existing_state {
1515 existing_id
1516 } else {
1517 let new_id = StateId(state_counter);
1519 let mut new_set = goto_set;
1520 new_set.id = new_id;
1521 collection.sets.push(new_set);
1522 state_counter += 1;
1523 new_id
1524 };
1525
1526 let symbol_id = match symbol {
1528 Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
1529 Symbol::Optional(_)
1530 | Symbol::Repeat(_)
1531 | Symbol::RepeatOne(_)
1532 | Symbol::Choice(_)
1533 | Symbol::Sequence(_)
1534 | Symbol::Epsilon => {
1535 panic!(
1536 "Complex symbols should be normalized before LR item generation"
1537 );
1538 }
1539 };
1540 if current_set.id.0 == 0 {
1541 debug_trace!(
1542 " State 0 GOTO: symbol {:?} -> state {}",
1543 symbol_id,
1544 target_state.0
1545 );
1546 }
1547 collection
1548 .goto_table
1549 .insert((current_set.id, symbol_id), target_state);
1550
1551 let is_terminal = matches!(symbol, Symbol::Terminal(_) | Symbol::External(_));
1553 collection.symbol_is_terminal.insert(symbol_id, is_terminal);
1554 }
1556 }
1557
1558 i += 1;
1559 }
1560
1561 collection
1562 }
1563}
1564
1565#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1567#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
1568pub struct LexMode {
1569 pub lex_state: u16,
1571 pub external_lex_state: u16,
1573}
1574
1575#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1577#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
1578pub enum GotoIndexing {
1579 NonterminalMap,
1581 DirectSymbolId,
1583}
1584
1585#[derive(Debug, Clone)]
1587#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
1588#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
1589pub struct ParseTable {
1590 pub action_table: Vec<Vec<ActionCell>>,
1592 pub goto_table: Vec<Vec<StateId>>,
1594 pub symbol_metadata: Vec<SymbolMetadata>,
1596 pub state_count: usize,
1598 pub symbol_count: usize,
1600 pub symbol_to_index: BTreeMap<SymbolId, usize>,
1602 pub index_to_symbol: Vec<SymbolId>,
1604 pub external_scanner_states: Vec<Vec<bool>>,
1606 pub rules: Vec<ParseRule>,
1608 pub nonterminal_to_index: BTreeMap<SymbolId, usize>,
1610 pub goto_indexing: GotoIndexing,
1612 pub eof_symbol: SymbolId,
1614 pub start_symbol: SymbolId,
1616 pub grammar: Grammar,
1618 pub initial_state: StateId,
1620 pub token_count: usize,
1622 pub external_token_count: usize,
1624 pub lex_modes: Vec<LexMode>,
1626 pub extras: Vec<SymbolId>,
1628 pub dynamic_prec_by_rule: Vec<i16>,
1630 pub rule_assoc_by_rule: Vec<i8>,
1632 pub alias_sequences: Vec<Vec<Option<SymbolId>>>,
1634 pub field_names: Vec<String>,
1636 pub field_map: BTreeMap<(RuleId, u16), u16>,
1638}
1639
1640#[derive(Debug, Clone)]
1642#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
1643#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
1644pub struct ParseRule {
1645 pub lhs: SymbolId,
1647 pub rhs_len: u16,
1649}
1650
1651impl Default for ParseTable {
1652 fn default() -> Self {
1653 Self {
1654 action_table: vec![],
1655 goto_table: vec![],
1656 symbol_metadata: vec![],
1657 state_count: 0,
1658 symbol_count: 0,
1659 symbol_to_index: BTreeMap::new(),
1660 index_to_symbol: vec![],
1661 external_scanner_states: vec![],
1662 rules: vec![],
1663 nonterminal_to_index: BTreeMap::new(),
1664 goto_indexing: GotoIndexing::NonterminalMap,
1665 eof_symbol: SymbolId(0),
1666 start_symbol: SymbolId(0),
1667 grammar: Grammar::new("default".to_string()),
1668 initial_state: StateId(0),
1669 token_count: 0,
1670 external_token_count: 0,
1671 lex_modes: vec![],
1672 extras: vec![],
1673 dynamic_prec_by_rule: vec![],
1674 rule_assoc_by_rule: vec![],
1675 alias_sequences: vec![],
1676 field_names: vec![],
1677 field_map: BTreeMap::new(),
1678 }
1679 }
1680}
1681
1682impl ParseTable {
1683 pub fn with_detected_goto_indexing(mut self) -> Self {
1685 self.detect_goto_indexing();
1686 self
1687 }
1688
1689 pub fn normalize_eof_to_zero(mut self) -> Self {
1692 if self.eof_symbol == SymbolId(0) {
1694 return self;
1695 }
1696
1697 let old_eof = self.eof_symbol;
1698 #[cfg(debug_assertions)]
1700 debug_trace!("Normalizing EOF from {:?} to SymbolId(0)", old_eof);
1701
1702 let old_idx = self.symbol_to_index.get(&old_eof).copied();
1704 let zero_idx = self.symbol_to_index.get(&SymbolId(0)).copied();
1705
1706 if let (Some(old_idx), Some(zero_idx)) = (old_idx, zero_idx) {
1708 for row in &mut self.action_table {
1709 if old_idx < row.len() && zero_idx < row.len() {
1710 row.swap(old_idx, zero_idx);
1711 }
1712 }
1713 self.symbol_to_index.insert(SymbolId(0), old_idx);
1715 self.symbol_to_index.remove(&old_eof);
1716
1717 if old_idx < self.index_to_symbol.len() {
1719 self.index_to_symbol[old_idx] = SymbolId(0);
1720 }
1721 } else if let Some(old_idx) = old_idx {
1722 self.symbol_to_index.remove(&old_eof);
1724 self.symbol_to_index.insert(SymbolId(0), old_idx);
1725
1726 if old_idx < self.index_to_symbol.len() {
1728 self.index_to_symbol[old_idx] = SymbolId(0);
1729 }
1730 } else {
1731 self.symbol_to_index.insert(SymbolId(0), 0);
1733 }
1734
1735 self.eof_symbol = SymbolId(0);
1737 self
1738 }
1739
1740 pub fn detect_goto_indexing(&mut self) {
1742 let start_nt = self.start_symbol;
1744
1745 let col_map = self
1747 .nonterminal_to_index
1748 .get(&start_nt)
1749 .and_then(|&c| self.goto_table.first().and_then(|row| row.get(c)))
1750 .copied();
1751
1752 let col_direct = self
1754 .goto_table
1755 .first()
1756 .and_then(|row| row.get(start_nt.0 as usize))
1757 .copied();
1758
1759 self.goto_indexing = match (col_map, col_direct) {
1760 (Some(s), _) if s.0 != 0 => GotoIndexing::NonterminalMap,
1761 (_, Some(s)) if s.0 != 0 => GotoIndexing::DirectSymbolId,
1762 _ => GotoIndexing::NonterminalMap,
1764 };
1765 }
1766
1767 #[inline]
1769 pub fn terminal_boundary(&self) -> usize {
1770 self.token_count + self.external_token_count
1771 }
1772
1773 #[inline]
1775 pub fn is_terminal(&self, sym: SymbolId) -> bool {
1776 (sym.0 as usize) < self.terminal_boundary()
1777 }
1778
1779 pub fn valid_symbols(&self, state: StateId) -> Vec<bool> {
1781 let n = self.terminal_boundary();
1782 let mut v = vec![false; n];
1783 let s = state.0 as usize;
1784 if s < self.action_table.len() {
1785 for t in 0..n.min(self.action_table[s].len()) {
1786 v[t] = !self.action_table[s][t].is_empty();
1787 }
1788 }
1789 v
1790 }
1791
1792 #[inline]
1820 pub fn actions(&self, state: StateId, sym: SymbolId) -> &'_ [Action] {
1821 let s = state.0 as usize;
1822 let Some(&col) = self.symbol_to_index.get(&sym) else {
1823 return &[];
1824 };
1825 if s >= self.action_table.len() || col >= self.action_table[s].len() {
1826 return &[];
1827 }
1828 &self.action_table[s][col]
1829 }
1830
1831 #[inline]
1859 pub fn goto(&self, state: StateId, nt: SymbolId) -> Option<StateId> {
1860 let s = state.0 as usize;
1861 let &col = self.nonterminal_to_index.get(&nt)?;
1862 let ns = *self.goto_table.get(s)?.get(col)?;
1864 (ns.0 != u16::MAX).then_some(ns)
1865 }
1866
1867 #[inline]
1869 pub fn rule(&self, id: RuleId) -> (SymbolId, u16) {
1870 let r = &self.rules[id.0 as usize];
1871 (r.lhs, r.rhs_len)
1872 }
1873
1874 #[inline]
1876 pub fn eof(&self) -> SymbolId {
1877 self.eof_symbol
1878 }
1879
1880 #[inline]
1882 pub fn start_symbol(&self) -> SymbolId {
1883 self.start_symbol
1884 }
1885
1886 #[inline]
1888 pub fn grammar(&self) -> &Grammar {
1889 &self.grammar
1890 }
1891
1892 #[inline]
1894 pub fn error_symbol(&self) -> SymbolId {
1895 SymbolId(0)
1898 }
1899
1900 #[inline]
1902 pub fn valid_symbols_mask(&self, state: StateId) -> Vec<bool> {
1903 let n = self.terminal_boundary();
1904 let mut v = vec![false; n];
1905 let s = state.0 as usize;
1906 if s < self.action_table.len() {
1907 for t in 0..n.min(self.action_table[s].len()) {
1908 v[t] = !self.action_table[s][t].is_empty();
1909 }
1910 }
1911 v
1912 }
1913
1914 #[inline]
1916 pub fn lex_mode(&self, state: StateId) -> LexMode {
1917 let idx = state.0 as usize;
1918 if idx < self.lex_modes.len() {
1919 self.lex_modes[idx]
1920 } else {
1921 LexMode {
1922 lex_state: 0,
1923 external_lex_state: 0,
1924 }
1925 }
1926 }
1927
1928 #[inline]
1930 pub fn is_extra(&self, sym: SymbolId) -> bool {
1931 self.extras.contains(&sym)
1932 }
1933
1934 #[must_use = "validation result must be checked"]
1942 pub fn validate(&self) -> Result<(), TableError> {
1943 let terminal_boundary = self.token_count + self.external_token_count;
1944
1945 debug_assert_ne!(
1946 self.eof_symbol,
1947 parse_forest::ERROR_SYMBOL,
1948 "EOF symbol cannot be the ERROR sentinel"
1949 );
1950
1951 if self.eof_symbol == parse_forest::ERROR_SYMBOL {
1952 return Err(TableError::EofIsError);
1953 }
1954
1955 if (self.eof_symbol.0 as usize) < terminal_boundary {
1957 return Err(TableError::EofNotSentinel {
1958 eof: self.eof_symbol.0,
1959 token_count: self.token_count as u32,
1960 external_count: self.external_token_count as u32,
1961 });
1962 }
1963
1964 if !self.symbol_to_index.contains_key(&self.eof_symbol) {
1966 return Err(TableError::EofMissingFromIndex);
1967 }
1968
1969 let tb = self.terminal_boundary();
1971
1972 debug_assert!(
1974 self.extras
1975 .iter()
1976 .all(|&sym| (sym.0 as usize) < self.token_count),
1977 "all extras must be within [0..token_count)"
1978 );
1979
1980 for sym_id in 0..self.token_count {
1982 let sym = SymbolId(sym_id as u16);
1983 debug_assert!(self.is_terminal(sym), "0..token_count are terminals");
1984 debug_assert!(
1986 (sym.0 as usize) < self.token_count,
1987 "regular terminals are in [0..token_count)"
1988 );
1989 }
1990
1991 for sym_id in self.token_count..tb {
1993 let sym = SymbolId(sym_id as u16);
1994 debug_assert!(self.is_terminal(sym), "external tokens are terminals");
1995 debug_assert!(
1997 (sym.0 as usize) >= self.token_count && (sym.0 as usize) < tb,
1998 "external tokens are in [token_count..terminal_boundary)"
1999 );
2000 }
2001
2002 debug_assert!(
2003 self.symbol_to_index.contains_key(&self.eof_symbol),
2004 "EOF must exist in ACTION map"
2005 );
2006
2007 if terminal_boundary > 0 {
2010 let end_symbol = SymbolId((terminal_boundary - 1) as u16);
2011 if let (Some(&eof_col), Some(&end_col)) = (
2012 self.symbol_to_index.get(&self.eof_symbol),
2013 self.symbol_to_index.get(&end_symbol),
2014 ) {
2015 for (state_idx, row) in self.action_table.iter().enumerate() {
2017 if eof_col < row.len() && end_col < row.len() {
2018 let eof_actions = &row[eof_col];
2019 let end_actions = &row[end_col];
2020
2021 if eof_actions.is_empty() != end_actions.is_empty() {
2024 return Err(TableError::EofParityMismatch(state_idx as u16));
2025 }
2026 }
2027 }
2028 }
2029 }
2030
2031 Ok(())
2032 }
2033
2034 pub fn remap_goto_to_direct_symbol_id(mut self) -> Self {
2037 if matches!(self.goto_indexing, GotoIndexing::DirectSymbolId) {
2038 return self;
2039 }
2040 let max_sym = self
2042 .nonterminal_to_index
2043 .keys()
2044 .map(|s| s.0 as usize)
2045 .max()
2046 .unwrap_or(0);
2047 let new_width = max_sym + 1;
2048
2049 for row in &mut self.goto_table {
2050 debug_assert!(
2052 self.nonterminal_to_index.values().all(|&c| c < row.len()),
2053 "nonterminal_to_index contains a column >= row width"
2054 );
2055
2056 let mut new_row = vec![StateId(0); new_width];
2057 for (sym, &old_col) in &self.nonterminal_to_index {
2059 if old_col < row.len() {
2060 new_row[sym.0 as usize] = row[old_col];
2061 }
2062 }
2063 *row = new_row;
2064 }
2065 self.goto_indexing = GotoIndexing::DirectSymbolId;
2066 self
2067 }
2068
2069 pub fn remap_goto_to_nonterminal_map(mut self) -> Self {
2072 if matches!(self.goto_indexing, GotoIndexing::NonterminalMap) {
2073 return self;
2074 }
2075 let width = self
2077 .nonterminal_to_index
2078 .values()
2079 .copied()
2080 .max()
2081 .unwrap_or(0)
2082 + 1;
2083 for row in &mut self.goto_table {
2084 debug_assert!(
2086 self.nonterminal_to_index
2087 .keys()
2088 .all(|s| (s.0 as usize) < row.len()),
2089 "nonterminal_to_index contains a symbol id >= row width"
2090 );
2091
2092 let mut new_row = vec![StateId(0); width];
2093 for (sym, &col) in &self.nonterminal_to_index {
2094 let src = sym.0 as usize;
2095 if src < row.len() && col < new_row.len() {
2096 new_row[col] = row[src];
2097 }
2098 }
2099 *row = new_row;
2100 }
2101 self.goto_indexing = GotoIndexing::NonterminalMap;
2102 self
2103 }
2104}
2105
2106#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
2108#[non_exhaustive]
2109#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2110pub enum Action {
2111 Shift(StateId),
2113 Reduce(RuleId),
2115 Accept,
2117 Error,
2119 Recover,
2121 Fork(Vec<Action>),
2123}
2124
2125pub type ActionCell = Vec<Action>;
2127
2128#[derive(Debug, Clone)]
2130#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
2131#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2132pub struct SymbolMetadata {
2133 pub name: String,
2135 pub is_visible: bool,
2137 pub is_named: bool,
2139 pub is_supertype: bool,
2141 pub is_terminal: bool,
2144 pub is_extra: bool,
2146 pub is_fragile: bool,
2148 pub symbol_id: SymbolId,
2150}
2151
2152#[derive(Debug, Clone)]
2154#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2155pub struct ConflictResolver {
2156 pub conflicts: Vec<Conflict>,
2158}
2159
2160#[derive(Debug, Clone)]
2162#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2163pub struct Conflict {
2164 pub state: StateId,
2166 pub symbol: SymbolId,
2168 pub actions: Vec<Action>,
2170 pub conflict_type: ConflictType,
2172}
2173
2174#[derive(Debug, Clone, PartialEq, Eq)]
2176#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2177pub enum ConflictType {
2178 ShiftReduce,
2180 ReduceReduce,
2182}
2183
2184impl ConflictResolver {
2185 pub fn detect_conflicts(
2213 item_sets: &ItemSetCollection,
2214 grammar: &Grammar,
2215 _first_follow: &FirstFollowSets,
2216 ) -> Self {
2217 let mut conflicts = Vec::new();
2218
2219 for item_set in &item_sets.sets {
2220 let mut actions_by_symbol: IndexMap<SymbolId, Vec<Action>> = IndexMap::new();
2221
2222 for item in &item_set.items {
2224 if item.is_reduce_item(grammar) {
2225 let mut is_accept = false;
2227
2228 if let Some(start_symbol) = grammar.start_symbol() {
2230 for rule in grammar.all_rules() {
2232 if rule.production_id.0 == item.rule_id.0 {
2233 is_accept =
2235 rule.lhs == start_symbol && item.lookahead == SymbolId(0);
2236 break;
2237 }
2238 }
2239 }
2240
2241 let action = if is_accept {
2242 Action::Accept
2243 } else {
2244 Action::Reduce(item.rule_id)
2245 };
2246
2247 actions_by_symbol
2248 .entry(item.lookahead)
2249 .or_default()
2250 .push(action);
2251 } else if let Some(symbol) = item.next_symbol(grammar) {
2252 let symbol_id = match symbol {
2254 Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => {
2255 *id
2256 }
2257 Symbol::Optional(_)
2258 | Symbol::Repeat(_)
2259 | Symbol::RepeatOne(_)
2260 | Symbol::Choice(_)
2261 | Symbol::Sequence(_)
2262 | Symbol::Epsilon => {
2263 panic!(
2264 "Complex symbols should be normalized before LR item generation"
2265 );
2266 }
2267 };
2268
2269 if let Some(target_state) = item_sets.goto_table.get(&(item_set.id, symbol_id))
2270 {
2271 let action = Action::Shift(*target_state);
2272 actions_by_symbol.entry(symbol_id).or_default().push(action);
2273 }
2274 }
2275 }
2276
2277 for (symbol_id, actions) in actions_by_symbol {
2279 if actions.len() > 1 {
2280 let conflict_type = if actions.iter().any(|a| matches!(a, Action::Shift(_)))
2281 && actions.iter().any(|a| matches!(a, Action::Reduce(_)))
2282 {
2283 ConflictType::ShiftReduce
2284 } else {
2285 ConflictType::ReduceReduce
2286 };
2287
2288 conflicts.push(Conflict {
2289 state: item_set.id,
2290 symbol: symbol_id,
2291 actions,
2292 conflict_type,
2293 });
2294 }
2295 }
2296 }
2297
2298 Self { conflicts }
2299 }
2300
2301 pub fn resolve_conflicts(&mut self, grammar: &Grammar) {
2303 let mut conflicts_to_resolve = self.conflicts.clone();
2305 for conflict in &mut conflicts_to_resolve {
2306 self.resolve_single_conflict(conflict, grammar);
2308 }
2309 self.conflicts = conflicts_to_resolve;
2310 }
2311
2312 fn resolve_single_conflict(&self, conflict: &mut Conflict, grammar: &Grammar) {
2313 match conflict.conflict_type {
2317 ConflictType::ShiftReduce => {
2318 self.resolve_shift_reduce_conflict(conflict, grammar);
2321 }
2322 ConflictType::ReduceReduce => {
2323 self.resolve_reduce_reduce_conflict(conflict, grammar);
2326 }
2327 }
2328 }
2329
2330 fn resolve_shift_reduce_conflict(&self, conflict: &mut Conflict, grammar: &Grammar) {
2331 let precedence_resolver = StaticPrecedenceResolver::from_grammar(grammar);
2333
2334 let mut shift_action = None;
2335 let mut reduce_action = None;
2336
2337 for action in &conflict.actions {
2339 match action {
2340 Action::Shift(_) => shift_action = Some(action.clone()),
2341 Action::Reduce(_) => reduce_action = Some(action.clone()),
2342 _ => {}
2343 }
2344 }
2345
2346 match (shift_action, reduce_action) {
2347 (Some(shift), Some(reduce)) => {
2348 let shift_prec = precedence_resolver.token_precedence(conflict.symbol);
2350
2351 let reduce_prec = if let Action::Reduce(rule_id) = &reduce {
2353 precedence_resolver.rule_precedence(*rule_id)
2354 } else {
2355 None
2356 };
2357
2358 match compare_precedences(shift_prec, reduce_prec) {
2363 PrecedenceComparison::PreferShift => {
2364 conflict.actions = vec![shift];
2366 }
2367 PrecedenceComparison::PreferReduce => {
2368 conflict.actions = vec![reduce];
2370 }
2371 PrecedenceComparison::Error => {
2372 conflict.actions = vec![Action::Fork(vec![shift, reduce])];
2375 }
2376 PrecedenceComparison::None => {
2377 conflict.actions = vec![Action::Fork(vec![shift, reduce])];
2379 }
2380 }
2381 }
2382 _ => {
2383 }
2386 }
2387 }
2388
2389 fn resolve_reduce_reduce_conflict(&self, conflict: &mut Conflict, _grammar: &Grammar) {
2390 let mut best_action = None;
2394 let mut best_rule_id = u16::MAX;
2395
2396 for action in &conflict.actions {
2397 if let Action::Reduce(rule_id) = action
2398 && rule_id.0 < best_rule_id
2399 {
2400 best_rule_id = rule_id.0;
2401 best_action = Some(action.clone());
2402 }
2403 }
2404
2405 if let Some(action) = best_action {
2406 conflict.actions = vec![action];
2407 }
2408 }
2409}
2410
2411#[derive(Debug, thiserror::Error)]
2413#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2414pub enum GLRError {
2415 #[error("Grammar error: {0}")]
2417 GrammarError(#[from] GrammarError),
2418
2419 #[error("Conflict resolution failed: {0}")]
2421 ConflictResolution(String),
2422
2423 #[error("State machine generation failed: {0}")]
2425 StateMachine(String),
2426
2427 #[error("Table validation failed: {0}")]
2429 TableValidation(TableError),
2430
2431 #[error("Complex symbols must be normalized before {operation}")]
2433 ComplexSymbolsNotNormalized { operation: String },
2434
2435 #[error("Expected {expected} symbol, found complex symbol")]
2437 ExpectedSimpleSymbol { expected: String },
2438
2439 #[error("Invalid symbol state during {operation}")]
2441 InvalidSymbolState { operation: String },
2442}
2443
2444#[derive(Debug, thiserror::Error)]
2446#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2447pub enum TableError {
2448 #[error("EOF symbol collides with ERROR")]
2450 EofIsError,
2451
2452 #[error(
2454 "EOF symbol must be >= token_count + external_token_count (EOF: {eof}, tokens: {token_count}, externals: {external_count})"
2455 )]
2456 EofNotSentinel {
2457 eof: u16,
2458 token_count: u32,
2459 external_count: u32,
2460 },
2461
2462 #[error("EOF not present in symbol_to_index")]
2464 EofMissingFromIndex,
2465
2466 #[error("EOF column parity mismatch at state {0}")]
2468 EofParityMismatch(u16),
2469}
2470
2471#[allow(dead_code)]
2473fn can_derive_start(grammar: &Grammar, symbol: SymbolId, start: SymbolId) -> bool {
2474 if symbol == start {
2475 return true;
2476 }
2477
2478 if let Some(rules) = grammar.get_rules_for_symbol(symbol) {
2480 for rule in rules {
2481 if rule.rhs.len() == 1
2482 && let Symbol::NonTerminal(target) = &rule.rhs[0]
2483 && *target == start
2484 {
2485 return true;
2486 }
2487 }
2488 }
2489
2490 false
2491}
2492
2493fn normalize_action_table(action_table: &mut Vec<Vec<ActionCell>>) {
2496 for row in action_table.iter_mut() {
2497 for cell in row.iter_mut() {
2498 normalize_action_cell(cell);
2499 }
2500 }
2501}
2502
2503fn normalize_action_cell(cell: &mut ActionCell) {
2505 for action in cell.iter_mut() {
2506 normalize_action(action);
2507 }
2508 cell.sort_by_key(action_sort_key);
2509 cell.dedup();
2510}
2511
2512fn normalize_action(action: &mut Action) {
2514 if let Action::Fork(inner) = action {
2515 for inner_action in inner.iter_mut() {
2516 normalize_action(inner_action);
2517 }
2518 inner.sort_by_key(action_sort_key);
2519 inner.dedup();
2520 }
2521}
2522
2523fn action_sort_key(action: &Action) -> (u8, u16, u16, u16) {
2525 match action {
2526 Action::Shift(s) => (0, s.0, 0, 0),
2527 Action::Reduce(r) => (1, r.0, 0, 0),
2528 Action::Accept => (2, 0, 0, 0),
2529 Action::Error => (3, 0, 0, 0),
2530 Action::Recover => (4, 0, 0, 0),
2531 Action::Fork(inner) => {
2532 let first = inner.first().map(action_sort_key).unwrap_or((0, 0, 0, 0));
2534 (5, first.1, first.2, inner.len() as u16)
2535 }
2536 }
2537}
2538
2539pub fn build_lr1_automaton(
2573 grammar: &Grammar,
2574 first_follow: &FirstFollowSets,
2575) -> Result<ParseTable, GLRError> {
2576 let mut rule_count = 0;
2578 for rule in grammar.all_rules() {
2579 if rule_count >= 10 {
2580 break;
2581 }
2582 let mut rhs_str = String::new();
2583 for sym in &rule.rhs {
2584 match sym {
2585 Symbol::Terminal(id) => rhs_str.push_str(&format!("T({}) ", id.0)),
2586 Symbol::NonTerminal(id) => rhs_str.push_str(&format!("NT({}) ", id.0)),
2587 _ => rhs_str.push_str("? "),
2588 }
2589 }
2590 rule_count += 1;
2591 }
2592
2593 let nonterminal_symbols: BTreeSet<SymbolId> = grammar.rules.keys().copied().collect();
2598 let external_symbols: BTreeSet<SymbolId> =
2599 grammar.externals.iter().map(|e| e.symbol_id).collect();
2600 let mut rhs_terminals: BTreeSet<SymbolId> = BTreeSet::new();
2601 for rule in grammar.all_rules() {
2602 for sym in &rule.rhs {
2603 if let Symbol::Terminal(id) = sym {
2604 rhs_terminals.insert(*id);
2605 }
2606 }
2607 }
2608
2609 let max_symbol = grammar
2612 .tokens
2613 .keys()
2614 .chain(grammar.rule_names.keys())
2615 .chain(nonterminal_symbols.iter())
2616 .chain(external_symbols.iter())
2617 .chain(rhs_terminals.iter())
2618 .map(|s| s.0)
2619 .max()
2620 .unwrap_or(0);
2621 let eof_symbol = SymbolId(max_symbol.checked_add(1).ok_or_else(|| {
2622 GLRError::StateMachine("EOF symbol would overflow u16: grammar has too many symbols".into())
2623 })?);
2624
2625 let mut augmented_grammar = grammar.clone();
2627
2628 let original_start =
2630 grammar
2631 .start_symbol()
2632 .ok_or(GLRError::GrammarError(GrammarError::UnresolvedSymbol(
2633 SymbolId(0),
2634 )))?;
2635
2636 if let Some(_name) = grammar.rule_names.get(&original_start) {}
2637
2638 let augmented_start_id = max_symbol.checked_add(2).ok_or_else(|| {
2641 GLRError::StateMachine(
2642 "Augmented start symbol would overflow u16: grammar has too many symbols".into(),
2643 )
2644 })?;
2645 let augmented_start = SymbolId(augmented_start_id);
2646
2647 let max_production_id = grammar
2649 .all_rules()
2650 .map(|r| r.production_id.0)
2651 .max()
2652 .unwrap_or(0);
2653 let augmented_production_id = max_production_id
2654 .checked_add(1)
2655 .ok_or_else(|| GLRError::StateMachine("Production ID overflow".into()))?;
2656
2657 let augmented_rule = Rule {
2659 lhs: augmented_start,
2660 rhs: vec![Symbol::NonTerminal(original_start)],
2661 precedence: None,
2662 associativity: None,
2663 fields: vec![],
2664 production_id: ProductionId(augmented_production_id),
2665 };
2666 augmented_grammar
2667 .rules
2668 .insert(augmented_start, vec![augmented_rule]);
2669 augmented_grammar
2670 .rule_names
2671 .insert(augmented_start, "$start".to_string());
2672
2673 let collection = ItemSetCollection::build_canonical_collection_augmented(
2675 &augmented_grammar,
2676 first_follow,
2677 augmented_start,
2678 original_start,
2679 eof_symbol,
2680 );
2681
2682 let mut symbol_to_index = BTreeMap::new();
2684
2685 symbol_to_index.insert(eof_symbol, 0);
2687
2688 let mut internal_terminals: BTreeSet<SymbolId> = grammar.tokens.keys().copied().collect();
2697 internal_terminals.extend(rhs_terminals.iter().copied());
2698 internal_terminals.remove(&eof_symbol);
2699 for id in &external_symbols {
2700 internal_terminals.remove(id);
2701 }
2702 for id in &nonterminal_symbols {
2703 internal_terminals.remove(id);
2704 }
2705
2706 let mut internal_tokens: Vec<SymbolId> = internal_terminals.into_iter().collect();
2707 internal_tokens.sort_by_key(|s| s.0);
2708 for &id in &internal_tokens {
2709 if !symbol_to_index.contains_key(&id) {
2710 let idx = symbol_to_index.len();
2711 symbol_to_index.insert(id, idx);
2712 }
2713 }
2714
2715 let mut ext_tokens: Vec<SymbolId> = external_symbols.iter().copied().collect();
2717 ext_tokens.sort_by_key(|s| s.0);
2718 for &id in &ext_tokens {
2719 if !symbol_to_index.contains_key(&id) {
2720 let idx = symbol_to_index.len();
2721 symbol_to_index.insert(id, idx);
2722 }
2723 }
2724
2725 let mut non_terminals: Vec<SymbolId> = nonterminal_symbols.iter().copied().collect();
2727 non_terminals.sort_by_key(|s| s.0);
2728 for id in non_terminals {
2729 if !symbol_to_index.contains_key(&id) {
2730 let idx = symbol_to_index.len();
2731 symbol_to_index.insert(id, idx);
2732 }
2733 }
2734
2735 let mut other_symbols: Vec<SymbolId> = grammar
2737 .rule_names
2738 .keys()
2739 .cloned()
2740 .filter(|id| !symbol_to_index.contains_key(id))
2741 .collect();
2742 other_symbols.sort_by_key(|s| s.0);
2743 if !other_symbols.is_empty() {
2744 return Err(GLRError::StateMachine(format!(
2745 "Unexpected symbols outside terminal/nonterminal partitions: {:?}",
2746 other_symbols
2747 )));
2748 }
2749
2750 let indexed_symbol_count = symbol_to_index.len();
2752
2753 let state_count = collection.sets.len();
2755 let symbol_count = indexed_symbol_count; let mut action_table = vec![vec![Vec::new(); indexed_symbol_count]; state_count];
2758 let mut goto_table = vec![vec![StateId(0); indexed_symbol_count]; state_count];
2759
2760 let mut conflicts_by_state: BTreeMap<(usize, usize), Vec<Action>> = BTreeMap::new();
2762
2763 let mut rules = Vec::new();
2765 let mut dynamic_prec_by_rule = Vec::new();
2766 let mut rule_assoc_by_rule = Vec::new();
2767 let mut production_to_rule_id = BTreeMap::new();
2768
2769 for (rule_id, rule) in grammar.all_rules().enumerate() {
2770 production_to_rule_id.insert(rule.production_id.0, rule_id as u16);
2771 rules.push(ParseRule {
2772 lhs: rule.lhs,
2773 rhs_len: rule.rhs.len() as u16,
2774 });
2775
2776 let prec = match rule.precedence {
2778 Some(adze_ir::PrecedenceKind::Static(p)) => p,
2779 Some(adze_ir::PrecedenceKind::Dynamic(p)) => p,
2780 None => 0,
2781 };
2782 dynamic_prec_by_rule.push(prec);
2783
2784 let assoc = match rule.associativity {
2786 Some(adze_ir::Associativity::Left) => 1,
2787 Some(adze_ir::Associativity::Right) => -1,
2788 _ => 0,
2789 };
2790 rule_assoc_by_rule.push(assoc);
2791 }
2792
2793 debug_trace!(
2795 "DEBUG: Collection goto table has {} entries",
2796 collection.goto_table.len()
2797 );
2798 debug_trace!(
2799 "DEBUG: Augmented grammar has {} tokens",
2800 augmented_grammar.tokens.len()
2801 );
2802
2803 debug_trace!("=== Symbol Classification Debug ===");
2805 debug_trace!(
2806 "Tokens in augmented_grammar: {:?}",
2807 augmented_grammar
2808 .tokens
2809 .keys()
2810 .map(|k| k.0)
2811 .collect::<Vec<_>>()
2812 );
2813 debug_trace!(
2814 "Externals in augmented_grammar: {:?}",
2815 augmented_grammar
2816 .externals
2817 .iter()
2818 .map(|e| e.symbol_id.0)
2819 .collect::<Vec<_>>()
2820 );
2821 debug_trace!("Original grammar tokens: {}", grammar.tokens.len());
2822 debug_trace!(
2823 "Collection goto_table size: {}",
2824 collection.goto_table.len()
2825 );
2826
2827 let state0_gotos: Vec<_> = collection
2829 .goto_table
2830 .iter()
2831 .filter(|((from, _), _)| from.0 == 0)
2832 .collect();
2833 debug_trace!("State 0 has {} goto entries", state0_gotos.len());
2834 for ((_, _symbol), _to_state) in &state0_gotos {
2835 debug_trace!(" Symbol {} -> State {}", _symbol.0, _to_state.0);
2836 }
2837
2838 let mut _terminal_count = 0;
2841 let mut _non_terminal_count = 0;
2842
2843 for ((from_state, symbol), to_state) in &collection.goto_table {
2844 let is_terminal = collection
2846 .symbol_is_terminal
2847 .get(symbol)
2848 .copied()
2849 .unwrap_or(*symbol == eof_symbol); if from_state.0 == 0 {
2852 debug_trace!(
2853 "State 0 goto entry: symbol {} -> state {}, is_terminal={} (in tokens={}, in externals={}, is EOF={})",
2854 symbol.0,
2855 to_state.0,
2856 is_terminal,
2857 augmented_grammar.tokens.contains_key(symbol),
2858 augmented_grammar
2859 .externals
2860 .iter()
2861 .any(|e| e.symbol_id == *symbol),
2862 symbol.0 == 0
2863 );
2864 }
2865
2866 if is_terminal {
2867 _terminal_count += 1;
2868 if let Some(&symbol_idx) = symbol_to_index.get(symbol) {
2869 let state_idx = from_state.0 as usize;
2870 if state_idx < action_table.len() && symbol_idx < action_table[state_idx].len() {
2871 let new_action = Action::Shift(*to_state);
2873 if state_idx == 0 {
2874 debug_trace!(
2875 "DEBUG: Adding shift action to state 0: symbol {} (idx={}) -> state {}",
2876 symbol.0,
2877 symbol_idx,
2878 to_state.0
2879 );
2880 }
2881 add_action_with_conflict(
2882 &mut action_table,
2883 &mut conflicts_by_state,
2884 state_idx,
2885 symbol_idx,
2886 new_action,
2887 );
2888 } else if state_idx == 0 {
2889 debug_trace!(
2890 "DEBUG: SKIPPING shift for state 0: bounds check failed - state_idx={}, symbol_idx={}, action_table.len={}, inner_len={}",
2891 state_idx,
2892 symbol_idx,
2893 action_table.len(),
2894 if state_idx < action_table.len() {
2895 action_table[state_idx].len()
2896 } else {
2897 0
2898 }
2899 );
2900 }
2901 } else if from_state.0 == 0 {
2902 debug_trace!(
2903 "DEBUG: Terminal {} not in symbol_to_index for state 0",
2904 symbol.0
2905 );
2906 }
2907 } else {
2908 _non_terminal_count += 1;
2909 }
2910 }
2911
2912 for state_idx in 0..state_count {
2917 for extra_symbol_id in &augmented_grammar.extras {
2918 if let Some(&symbol_idx) = symbol_to_index.get(extra_symbol_id) {
2919 if action_table[state_idx][symbol_idx].is_empty() {
2922 action_table[state_idx][symbol_idx]
2924 .push(Action::Shift(StateId(state_idx as u16)));
2925 }
2926 }
2927 }
2928 }
2929
2930 for item_set in &collection.sets {
2932 let state_idx = item_set.id.0 as usize;
2933
2934 for item in &item_set.items {
2935 if item.is_reduce_item(&augmented_grammar) {
2936 if let Some(rule) = augmented_grammar
2938 .all_rules()
2939 .find(|r| r.production_id.0 == item.rule_id.0)
2940 && rule.lhs == augmented_start
2941 {
2942 if item.lookahead == eof_symbol {
2943 if let Some(&eof_idx) = symbol_to_index.get(&eof_symbol) {
2945 add_action_with_conflict(
2946 &mut action_table,
2947 &mut conflicts_by_state,
2948 state_idx,
2949 eof_idx,
2950 Action::Accept,
2951 );
2952 }
2953 }
2954 continue;
2956 }
2957
2958 if let Some(&rule_id) = production_to_rule_id.get(&item.rule_id.0) {
2960 let rule = &rules[rule_id as usize];
2961 let is_empty_production = rule.rhs_len == 0;
2962
2963 let lookaheads_to_check: Vec<SymbolId> = if is_empty_production {
2965 if let Some(follow_set) = first_follow.follow(rule.lhs) {
2967 follow_set
2970 .ones()
2971 .map(|idx| map_follow_symbol(SymbolId(idx as u16), eof_symbol))
2972 .collect()
2973 } else {
2974 vec![item.lookahead]
2975 }
2976 } else {
2977 vec![item.lookahead]
2978 };
2979
2980 for lookahead in lookaheads_to_check {
2981 if let Some(&lookahead_idx) = symbol_to_index.get(&lookahead) {
2982 let new_action = Action::Reduce(RuleId(rule_id));
2983
2984 add_action_with_conflict(
2986 &mut action_table,
2987 &mut conflicts_by_state,
2988 state_idx,
2989 lookahead_idx,
2990 new_action,
2991 );
2992 }
2993 }
2994 }
2995 }
2996 }
2998 }
2999
3000 let production_count = augmented_grammar.all_rules().count() as u32;
3004 let token_count = (internal_tokens.len() + 1) as u32;
3006 let prec_tables = build_prec_tables(
3007 &augmented_grammar,
3008 &symbol_to_index,
3009 token_count,
3010 production_count,
3011 );
3012
3013 let first_nonterminal_idx = internal_tokens.len() + ext_tokens.len() + 1;
3017
3018 for ((state_idx, symbol_idx), _actions) in conflicts_by_state {
3020 debug_assert!(state_idx < action_table.len(), "state_idx out of bounds");
3022 debug_assert!(
3023 symbol_idx < action_table[0].len(),
3024 "symbol_idx out of bounds"
3025 );
3026
3027 if symbol_idx >= first_nonterminal_idx {
3030 continue; }
3032
3033 let cell = &mut action_table[state_idx][symbol_idx];
3034
3035 if cell.is_empty() {
3037 continue;
3038 }
3039
3040 if cell.iter().any(|a| matches!(a, Action::Accept)) {
3042 *cell = vec![Action::Accept];
3043 continue;
3044 }
3045
3046 let first_shift = cell.iter().find_map(|a| {
3048 if let Action::Shift(s) = a {
3049 Some(*s)
3050 } else {
3051 None
3052 }
3053 });
3054 let mut reduces: Vec<u16> = cell
3055 .iter()
3056 .filter_map(|a| {
3057 if let Action::Reduce(pid) = a {
3058 Some(pid.0)
3059 } else {
3060 None
3061 }
3062 })
3063 .collect();
3064
3065 if reduces.len() > 1 {
3067 let winner = reduces[1..].iter().fold(reduces[0], |acc, &r| {
3068 decide_reduce_reduce(acc, r, &prec_tables)
3069 });
3070 reduces.clear();
3071 reduces.push(winner);
3072 cell.retain(|a| {
3074 matches!(a, Action::Shift(_)) || matches!(a, Action::Reduce(pid) if pid.0 == winner)
3075 });
3076 }
3077
3078 if let (Some(s), Some(r)) = (first_shift, reduces.first().copied()) {
3080 match decide_with_precedence(symbol_idx, r, &prec_tables) {
3081 PrecDecision::PreferShift => *cell = vec![Action::Shift(s)],
3082 PrecDecision::PreferReduce => *cell = vec![Action::Reduce(RuleId(r))],
3083 PrecDecision::Error => {
3084 }
3090 PrecDecision::NoInfo => {
3091 }
3095 }
3096 }
3097 }
3098
3099 for ((from_state, symbol), _to_state) in &collection.goto_table {
3101 let is_terminal = collection
3103 .symbol_is_terminal
3104 .get(symbol)
3105 .copied()
3106 .unwrap_or(*symbol == eof_symbol); if !is_terminal && let Some(&symbol_idx) = symbol_to_index.get(symbol) {
3109 let state_idx = from_state.0 as usize;
3110 if state_idx < goto_table.len() && symbol_idx < goto_table[state_idx].len() {
3111 }
3113 }
3114 }
3115
3116 for ((from_state, symbol), to_state) in &collection.goto_table {
3118 let from_idx = from_state.0 as usize;
3119 if let Some(&symbol_idx) = symbol_to_index.get(symbol) {
3120 goto_table[from_idx][symbol_idx] = *to_state;
3121 }
3122 }
3123
3124 if let Some(_start_symbol) = grammar.start_symbol() {
3129 for (state_idx, _item_set) in collection.sets.iter().enumerate() {
3131 let needs_eof_reduce = false;
3133 let reduce_rule_id: Option<RuleId> = None;
3134
3135 if needs_eof_reduce
3137 && let Some(rule_id) = reduce_rule_id
3138 && let Some(&eof_idx) = symbol_to_index.get(&SymbolId(0))
3139 {
3140 if action_table[state_idx][eof_idx].is_empty() {
3142 action_table[state_idx][eof_idx].push(Action::Reduce(rule_id));
3143 }
3144 }
3145 }
3146 }
3147
3148 let mut symbol_metadata = Vec::new();
3150
3151 for (symbol_id, token) in &grammar.tokens {
3153 symbol_metadata.push(SymbolMetadata {
3154 name: token.name.clone(),
3155 is_visible: !token.name.starts_with('_'),
3156 is_named: !matches!(&token.pattern, TokenPattern::String(_)),
3157 is_supertype: false,
3158 is_terminal: true,
3160 is_extra: grammar.extras.contains(symbol_id),
3161 is_fragile: false, symbol_id: *symbol_id,
3163 });
3164 }
3165
3166 for symbol_id in grammar.rules.keys() {
3168 let is_supertype = grammar.supertypes.contains(symbol_id);
3169 symbol_metadata.push(SymbolMetadata {
3170 name: format!("rule_{}", symbol_id.0),
3171 is_visible: true,
3172 is_named: true,
3173 is_supertype,
3174 is_terminal: false,
3176 is_extra: false, is_fragile: false, symbol_id: *symbol_id,
3179 });
3180 }
3181
3182 for external in &grammar.externals {
3184 symbol_metadata.push(SymbolMetadata {
3185 name: external.name.clone(),
3186 is_visible: !external.name.starts_with('_'),
3187 is_named: true,
3188 is_supertype: false,
3189 is_terminal: true, is_extra: false, is_fragile: false, symbol_id: external.symbol_id, });
3195 }
3196
3197 let mut external_scanner_states =
3201 vec![vec![false; augmented_grammar.externals.len()]; state_count];
3202
3203 let mut external_symbol_to_idx = BTreeMap::new();
3205 for (idx, external) in augmented_grammar.externals.iter().enumerate() {
3206 external_symbol_to_idx.insert(external.symbol_id, idx);
3207 }
3208
3209 for state_idx in 0..state_count {
3212 for (external_idx, external) in augmented_grammar.externals.iter().enumerate() {
3213 if let Some(&symbol_idx) = symbol_to_index.get(&external.symbol_id) {
3215 if action_table[state_idx][symbol_idx]
3217 .iter()
3218 .any(|a| matches!(a, Action::Shift(_)))
3219 {
3220 external_scanner_states[state_idx][external_idx] = true;
3221 }
3222 }
3223 }
3224 }
3225
3226 let mut nonterminal_to_index = BTreeMap::new();
3228 for (&symbol_id, &idx) in &symbol_to_index {
3229 if nonterminal_symbols.contains(&symbol_id) {
3230 nonterminal_to_index.insert(symbol_id, idx);
3231 }
3232 }
3233
3234 let _rule_count = rules.len();
3235
3236 let token_count = internal_tokens.len() + 1;
3239 let external_token_count = ext_tokens.len();
3240
3241 normalize_action_table(&mut action_table);
3243
3244 let mut index_to_symbol = vec![SymbolId(u16::MAX); symbol_to_index.len()];
3246 for (sym, &idx) in &symbol_to_index {
3247 index_to_symbol[idx] = *sym;
3248 }
3249
3250 let mut table = ParseTable {
3251 action_table,
3252 goto_table,
3253 symbol_metadata,
3254 state_count,
3255 symbol_count,
3256 symbol_to_index,
3257 index_to_symbol,
3258 external_scanner_states,
3259 rules,
3260 nonterminal_to_index,
3261 goto_indexing: GotoIndexing::NonterminalMap, eof_symbol,
3263 start_symbol: original_start,
3264 grammar: grammar.clone(),
3265 initial_state: StateId(0), token_count,
3267 external_token_count,
3268 lex_modes: vec![
3269 LexMode {
3270 lex_state: 0,
3271 external_lex_state: 0
3272 };
3273 state_count
3274 ],
3275 extras: vec![], dynamic_prec_by_rule, rule_assoc_by_rule, alias_sequences: vec![], field_names: vec![], field_map: BTreeMap::new(), };
3282
3283 table.detect_goto_indexing();
3285
3286 Ok(table)
3287}
3288
3289#[must_use = "validation result must be checked"]
3291pub fn sanity_check_tables(pt: &ParseTable) -> Result<(), String> {
3292 let eof_col = pt
3294 .symbol_to_index
3295 .get(&pt.eof_symbol)
3296 .ok_or_else(|| format!("EOF symbol {} not in symbol_to_index", pt.eof_symbol.0))?;
3297
3298 let accept_somewhere = pt.action_table.iter().any(|row| {
3299 row.get(*eof_col)
3300 .and_then(|cell| cell.iter().find(|a| matches!(a, Action::Accept)))
3301 .is_some()
3302 });
3303 if !accept_somewhere {
3304 return Err("No ACCEPT on EOF found in action table".to_string());
3305 }
3306
3307 for pid in 0..pt.rules.len() {
3309 let lhs = pt.rules[pid].lhs;
3310 let lhs_idx = pt
3311 .symbol_to_index
3312 .get(&lhs)
3313 .ok_or_else(|| format!("LHS symbol {} not in symbol_to_index", lhs.0))?;
3314
3315 if *lhs_idx < pt.token_count {
3317 return Err(format!(
3318 "LHS must be a non-terminal column (pid={}, lhs_idx={}, token_count={})",
3319 pid, lhs_idx, pt.token_count
3320 ));
3321 }
3322
3323 let any = pt
3324 .goto_table
3325 .iter()
3326 .any(|row| row.get(*lhs_idx).is_some_and(|s| s.0 != 0));
3327 if !any {
3328 return Err(format!("No goto(state, lhs(pid={})) present", pid));
3329 }
3330 }
3331
3332 for (sym, &idx) in &pt.symbol_to_index {
3334 if idx >= pt.index_to_symbol.len() {
3335 return Err(format!(
3336 "symbol_to_index has index {} but index_to_symbol has length {}",
3337 idx,
3338 pt.index_to_symbol.len()
3339 ));
3340 }
3341 if pt.index_to_symbol[idx] != *sym {
3342 return Err(format!(
3343 "index_to_symbol[{}] = {} but should be {}",
3344 idx, pt.index_to_symbol[idx].0, sym.0
3345 ));
3346 }
3347 }
3348
3349 Ok(())
3350}
3351
3352fn add_action_with_conflict(
3354 action_table: &mut Vec<Vec<ActionCell>>,
3355 conflicts_by_state: &mut BTreeMap<(usize, usize), Vec<Action>>,
3356 state_idx: usize,
3357 symbol_idx: usize,
3358 new_action: Action,
3359) {
3360 if state_idx >= action_table.len() || symbol_idx >= action_table[0].len() {
3362 panic!(
3363 "Index out of bounds in add_action_with_conflict: state_idx={}, symbol_idx={}, table_size={}x{}",
3364 state_idx,
3365 symbol_idx,
3366 action_table.len(),
3367 if action_table.is_empty() {
3368 0
3369 } else {
3370 action_table[0].len()
3371 }
3372 );
3373 }
3374
3375 let current_cell = &mut action_table[state_idx][symbol_idx];
3376
3377 if !current_cell.iter().any(|a| action_eq(a, &new_action)) {
3379 current_cell.push(new_action.clone());
3381
3382 if current_cell.len() > 1 {
3384 let entry = conflicts_by_state
3385 .entry((state_idx, symbol_idx))
3386 .or_default();
3387 *entry = current_cell.clone();
3388 }
3389 }
3390}
3391
3392pub fn build_lr1_automaton_res(
3397 grammar: &Grammar,
3398 first_follow: &FirstFollowSets,
3399) -> GlrResult<ParseTable> {
3400 build_lr1_automaton(grammar, first_follow)
3401}
3402
3403fn action_eq(a: &Action, b: &Action) -> bool {
3405 match (a, b) {
3406 (Action::Shift(s1), Action::Shift(s2)) => s1 == s2,
3407 (Action::Reduce(r1), Action::Reduce(r2)) => r1 == r2,
3408 (Action::Accept, Action::Accept) => true,
3409 (Action::Error, Action::Error) => true,
3410 (Action::Fork(a1), Action::Fork(a2)) => {
3411 a1.len() == a2.len() && a1.iter().zip(a2).all(|(x, y)| action_eq(x, y))
3412 }
3413 _ => false,
3414 }
3415}
3416
3417#[cfg(test)]
3418mod tests {
3419 use super::*;
3420
3421 #[test]
3422 fn test_lr_item_creation() {
3423 let item = LRItem::new(RuleId(1), 2, SymbolId(3));
3424 assert_eq!(item.rule_id, RuleId(1));
3425 assert_eq!(item.position, 2);
3426 assert_eq!(item.lookahead, SymbolId(3));
3427 }
3428
3429 #[test]
3430 fn test_lr_item_equality() {
3431 let item1 = LRItem::new(RuleId(1), 2, SymbolId(3));
3432 let item2 = LRItem::new(RuleId(1), 2, SymbolId(3));
3433 let item3 = LRItem::new(RuleId(1), 3, SymbolId(3));
3434
3435 assert_eq!(item1, item2);
3436 assert_ne!(item1, item3);
3437
3438 let mut set = std::collections::BTreeSet::new();
3440 set.insert(item1.clone());
3441 assert!(set.contains(&item1));
3442 assert!(set.contains(&item2));
3443 assert!(!set.contains(&item3));
3444 }
3445
3446 #[test]
3447 fn test_item_set_creation() {
3448 let mut item_set = ItemSet::new(StateId(0));
3449 let item = LRItem::new(RuleId(1), 0, SymbolId(0));
3450 item_set.add_item(item.clone());
3451
3452 assert_eq!(item_set.id, StateId(0));
3453 assert!(item_set.items.contains(&item));
3454 assert_eq!(item_set.items.len(), 1);
3455 }
3456
3457 #[test]
3458 fn test_item_set_duplicate_items() {
3459 let mut item_set = ItemSet::new(StateId(0));
3460 let item = LRItem::new(RuleId(1), 0, SymbolId(0));
3461
3462 item_set.add_item(item.clone());
3463 item_set.add_item(item.clone()); assert_eq!(item_set.items.len(), 1);
3467 }
3468
3469 #[test]
3470 fn test_first_follow_empty_grammar() {
3471 let grammar = Grammar::new("test".to_string());
3472 let first_follow = FirstFollowSets::compute(&grammar).unwrap();
3473
3474 assert!(first_follow.first.is_empty());
3475 assert!(first_follow.follow.is_empty());
3476 }
3477
3478 #[test]
3479 fn test_first_follow_simple_grammar() {
3480 let mut grammar = Grammar::new("test".to_string());
3481
3482 let rule = Rule {
3484 lhs: SymbolId(0), rhs: vec![Symbol::Terminal(SymbolId(1))], precedence: None,
3487 associativity: None,
3488 fields: vec![],
3489 production_id: ProductionId(0),
3490 };
3491 grammar.rules.entry(SymbolId(0)).or_default().push(rule);
3492
3493 let token = Token {
3495 name: "a".to_string(),
3496 pattern: TokenPattern::String("a".to_string()),
3497 fragile: false,
3498 };
3499 grammar.tokens.insert(SymbolId(1), token);
3500
3501 let first_follow = FirstFollowSets::compute(&grammar).unwrap();
3502
3503 assert!(first_follow.first.contains_key(&SymbolId(0)));
3505 if let Some(first_s) = first_follow.first(SymbolId(0)) {
3506 assert!(first_s.contains(1)); }
3508
3509 assert!(!first_follow.is_nullable(SymbolId(0)));
3511 }
3512
3513 #[test]
3514 fn test_first_follow_nullable_rule() {
3515 let mut grammar = Grammar::new("test".to_string());
3516
3517 let rule = Rule {
3519 lhs: SymbolId(0), rhs: vec![], precedence: None,
3522 associativity: None,
3523 fields: vec![],
3524 production_id: ProductionId(0),
3525 };
3526 grammar.rules.entry(SymbolId(0)).or_default().push(rule);
3527
3528 let first_follow = FirstFollowSets::compute(&grammar).unwrap();
3529
3530 assert!(first_follow.is_nullable(SymbolId(0)));
3532 }
3533
3534 #[test]
3535 fn test_first_of_sequence() {
3536 let mut grammar = Grammar::new("test".to_string());
3537
3538 let token_a = Token {
3540 name: "a".to_string(),
3541 pattern: TokenPattern::String("a".to_string()),
3542 fragile: false,
3543 };
3544 grammar.tokens.insert(SymbolId(1), token_a);
3545
3546 let token_b = Token {
3547 name: "b".to_string(),
3548 pattern: TokenPattern::String("b".to_string()),
3549 fragile: false,
3550 };
3551 grammar.tokens.insert(SymbolId(2), token_b);
3552
3553 let first_follow = FirstFollowSets::compute(&grammar).unwrap();
3554
3555 let sequence = vec![Symbol::Terminal(SymbolId(1)), Symbol::Terminal(SymbolId(2))];
3557 let first_seq = first_follow.first_of_sequence(&sequence).unwrap();
3558
3559 assert!(first_seq.contains(1));
3561 assert!(!first_seq.contains(2));
3562 }
3563
3564 #[test]
3565 fn test_action_types() {
3566 let shift = Action::Shift(StateId(1));
3567 let reduce = Action::Reduce(RuleId(2));
3568 let accept = Action::Accept;
3569 let error = Action::Error;
3570 let fork = Action::Fork(vec![shift.clone(), reduce.clone()]);
3571
3572 match shift {
3573 Action::Shift(StateId(1)) => {}
3574 _ => panic!("Expected shift action"),
3575 }
3576
3577 match reduce {
3578 Action::Reduce(RuleId(2)) => {}
3579 _ => panic!("Expected reduce action"),
3580 }
3581
3582 match accept {
3583 Action::Accept => {}
3584 _ => panic!("Expected accept action"),
3585 }
3586
3587 match error {
3588 Action::Error => {}
3589 _ => panic!("Expected error action"),
3590 }
3591
3592 match fork {
3593 Action::Fork(actions) => {
3594 assert_eq!(actions.len(), 2);
3595 assert_eq!(actions[0], shift);
3596 assert_eq!(actions[1], reduce);
3597 }
3598 _ => panic!("Expected fork action"),
3599 }
3600 }
3601
3602 #[test]
3603 fn test_action_equality() {
3604 let shift1 = Action::Shift(StateId(1));
3605 let shift2 = Action::Shift(StateId(1));
3606 let shift3 = Action::Shift(StateId(2));
3607
3608 assert_eq!(shift1, shift2);
3609 assert_ne!(shift1, shift3);
3610
3611 let reduce1 = Action::Reduce(RuleId(1));
3612 let reduce2 = Action::Reduce(RuleId(1));
3613
3614 assert_eq!(reduce1, reduce2);
3615 assert_ne!(shift1, reduce1);
3616 }
3617
3618 #[test]
3619 fn test_symbol_metadata() {
3620 let metadata = SymbolMetadata {
3621 name: "expression".to_string(),
3622 is_visible: true,
3623 is_named: true,
3624 is_supertype: false,
3625 is_terminal: false,
3627 is_extra: false,
3628 is_fragile: false,
3629 symbol_id: SymbolId(1),
3630 };
3631
3632 assert_eq!(metadata.name, "expression");
3633 assert!(metadata.is_visible);
3634 assert!(metadata.is_named);
3635 assert!(!metadata.is_supertype);
3636 assert!(!metadata.is_terminal);
3637 assert!(!metadata.is_extra);
3638 assert!(!metadata.is_fragile);
3639 assert_eq!(metadata.symbol_id, SymbolId(1));
3640 }
3641
3642 #[test]
3643 fn test_conflict_types() {
3644 let shift_reduce = ConflictType::ShiftReduce;
3645 let reduce_reduce = ConflictType::ReduceReduce;
3646
3647 assert_eq!(shift_reduce, ConflictType::ShiftReduce);
3648 assert_eq!(reduce_reduce, ConflictType::ReduceReduce);
3649 assert_ne!(shift_reduce, reduce_reduce);
3650 }
3651
3652 #[test]
3653 fn test_conflict_creation() {
3654 let conflict = Conflict {
3655 state: StateId(5),
3656 symbol: SymbolId(10),
3657 actions: vec![Action::Shift(StateId(1)), Action::Reduce(RuleId(2))],
3658 conflict_type: ConflictType::ShiftReduce,
3659 };
3660
3661 assert_eq!(conflict.state, StateId(5));
3662 assert_eq!(conflict.symbol, SymbolId(10));
3663 assert_eq!(conflict.actions.len(), 2);
3664 assert_eq!(conflict.conflict_type, ConflictType::ShiftReduce);
3665 }
3666
3667 #[test]
3668 fn test_conflict_resolver_creation() {
3669 let resolver = ConflictResolver { conflicts: vec![] };
3670
3671 assert!(resolver.conflicts.is_empty());
3672 }
3673
3674 #[test]
3675 fn test_parse_table_creation() {
3676 let parse_table = ParseTable {
3677 action_table: vec![vec![vec![Action::Error]; 5]; 3], goto_table: vec![vec![StateId(0); 5]; 3],
3679 symbol_metadata: vec![],
3680 state_count: 3,
3681 symbol_count: 5,
3682 symbol_to_index: BTreeMap::new(),
3683 index_to_symbol: vec![],
3684 external_scanner_states: vec![],
3685 rules: vec![],
3686 nonterminal_to_index: BTreeMap::new(),
3687 goto_indexing: GotoIndexing::NonterminalMap,
3688 eof_symbol: SymbolId(0),
3689 start_symbol: SymbolId(1),
3690 grammar: Grammar::new("test".to_string()),
3691 initial_state: StateId(0),
3692 token_count: 3,
3693 external_token_count: 0,
3694 lex_modes: vec![
3695 LexMode {
3696 lex_state: 0,
3697 external_lex_state: 0
3698 };
3699 3
3700 ],
3701 extras: vec![],
3702 dynamic_prec_by_rule: vec![],
3703 rule_assoc_by_rule: vec![],
3704 alias_sequences: vec![],
3705 field_names: vec![],
3706 field_map: BTreeMap::new(),
3707 };
3708
3709 assert_eq!(parse_table.state_count, 3);
3710 assert_eq!(parse_table.symbol_count, 5);
3711 assert_eq!(parse_table.action_table.len(), 3);
3712 assert_eq!(parse_table.goto_table.len(), 3);
3713 assert_eq!(parse_table.action_table[0].len(), 5);
3714 assert_eq!(parse_table.goto_table[0].len(), 5);
3715 }
3716
3717 #[test]
3718 fn test_lr_item_reduce_check() {
3719 let mut grammar = Grammar::new("test".to_string());
3720
3721 let rule = Rule {
3723 lhs: SymbolId(0),
3724 rhs: vec![Symbol::Terminal(SymbolId(1)), Symbol::Terminal(SymbolId(2))],
3725 precedence: None,
3726 associativity: None,
3727 fields: vec![],
3728 production_id: ProductionId(0),
3729 };
3730 grammar.rules.entry(SymbolId(0)).or_default().push(rule);
3731
3732 let item1 = LRItem::new(RuleId(0), 0, SymbolId(0));
3734 assert!(!item1.is_reduce_item(&grammar));
3735
3736 let item2 = LRItem::new(RuleId(0), 1, SymbolId(0));
3738 assert!(!item2.is_reduce_item(&grammar));
3739
3740 let item3 = LRItem::new(RuleId(0), 2, SymbolId(0));
3742 assert!(item3.is_reduce_item(&grammar));
3743 }
3744
3745 #[test]
3746 fn test_lr_item_next_symbol() {
3747 let mut grammar = Grammar::new("test".to_string());
3748
3749 let rule = Rule {
3751 lhs: SymbolId(0),
3752 rhs: vec![Symbol::Terminal(SymbolId(1)), Symbol::Terminal(SymbolId(2))],
3753 precedence: None,
3754 associativity: None,
3755 fields: vec![],
3756 production_id: ProductionId(0),
3757 };
3758 grammar.rules.entry(SymbolId(0)).or_default().push(rule);
3759
3760 let item1 = LRItem::new(RuleId(0), 0, SymbolId(0));
3762 if let Some(symbol) = item1.next_symbol(&grammar) {
3763 match symbol {
3764 Symbol::Terminal(SymbolId(1)) => {}
3765 _ => panic!("Expected terminal symbol with id 1"),
3766 }
3767 } else {
3768 panic!("Expected next symbol");
3769 }
3770
3771 let item2 = LRItem::new(RuleId(0), 1, SymbolId(0));
3773 if let Some(symbol) = item2.next_symbol(&grammar) {
3774 match symbol {
3775 Symbol::Terminal(SymbolId(2)) => {}
3776 _ => panic!("Expected terminal symbol with id 2"),
3777 }
3778 } else {
3779 panic!("Expected next symbol");
3780 }
3781
3782 let item3 = LRItem::new(RuleId(0), 2, SymbolId(0));
3784 assert!(item3.next_symbol(&grammar).is_none());
3785 }
3786
3787 #[test]
3788 fn test_item_set_collection_creation() {
3789 let collection = ItemSetCollection {
3790 sets: vec![],
3791 goto_table: IndexMap::new(),
3792 symbol_is_terminal: IndexMap::new(),
3793 };
3794
3795 assert!(collection.sets.is_empty());
3796 assert!(collection.goto_table.is_empty());
3797 }
3798
3799 #[test]
3800 fn test_glr_error_types() {
3801 let grammar_error = GLRError::GrammarError(GrammarError::InvalidFieldOrdering);
3802 let conflict_error = GLRError::ConflictResolution("Test conflict".to_string());
3803 let state_error = GLRError::StateMachine("Test state machine error".to_string());
3804
3805 match grammar_error {
3806 GLRError::GrammarError(_) => {}
3807 _ => panic!("Expected grammar error"),
3808 }
3809
3810 match conflict_error {
3811 GLRError::ConflictResolution(msg) => assert_eq!(msg, "Test conflict"),
3812 _ => panic!("Expected conflict resolution error"),
3813 }
3814
3815 match state_error {
3816 GLRError::StateMachine(msg) => assert_eq!(msg, "Test state machine error"),
3817 _ => panic!("Expected state machine error"),
3818 }
3819 }
3820
3821 #[test]
3822 fn test_item_set_equality() {
3823 let mut set1 = ItemSet::new(StateId(0));
3824 let mut set2 = ItemSet::new(StateId(1));
3825
3826 let item1 = LRItem::new(RuleId(1), 0, SymbolId(0));
3827 let item2 = LRItem::new(RuleId(2), 1, SymbolId(1));
3828
3829 set1.add_item(item1.clone());
3830 set1.add_item(item2.clone());
3831
3832 set2.add_item(item1);
3833 set2.add_item(item2);
3834
3835 assert_eq!(set1.items, set2.items);
3837 assert_ne!(set1.id, set2.id);
3838 }
3839
3840 #[test]
3841 fn test_recursive_fork_normalization() {
3842 let mut action = Action::Fork(vec![
3844 Action::Fork(vec![
3845 Action::Reduce(RuleId(3)),
3846 Action::Shift(StateId(2)),
3847 Action::Reduce(RuleId(1)),
3848 ]),
3849 Action::Shift(StateId(1)),
3850 Action::Fork(vec![
3851 Action::Accept,
3852 Action::Shift(StateId(4)),
3853 Action::Error,
3854 ]),
3855 ]);
3856
3857 normalize_action(&mut action);
3859
3860 if let Action::Fork(ref actions) = action {
3862 if let Action::Fork(ref inner) = actions[0] {
3864 assert_eq!(inner.len(), 3);
3865 assert!(matches!(inner[0], Action::Shift(StateId(2))));
3866 assert!(matches!(inner[1], Action::Reduce(RuleId(1))));
3867 assert!(matches!(inner[2], Action::Reduce(RuleId(3))));
3868 }
3869
3870 if let Action::Fork(ref inner) = actions[2] {
3872 assert_eq!(inner.len(), 3);
3873 assert!(matches!(inner[0], Action::Shift(StateId(4))));
3874 assert!(matches!(inner[1], Action::Accept));
3875 assert!(matches!(inner[2], Action::Error));
3876 }
3877 } else {
3878 panic!("Expected Fork action");
3879 }
3880 }
3881}