#![forbid(unsafe_op_in_unsafe_fn)]
#![deny(private_interfaces)]
#![cfg_attr(feature = "strict_api", deny(unreachable_pub))]
#![cfg_attr(not(feature = "strict_api"), warn(unreachable_pub))]
#![cfg_attr(feature = "strict_docs", warn(missing_docs))]
#![cfg_attr(not(feature = "strict_docs"), allow(missing_docs))]
#![allow(
clippy::ptr_arg,
clippy::explicit_counter_loop,
clippy::needless_range_loop,
clippy::unused_enumerate_index
)]
use adze_ir::*;
use fixedbitset::FixedBitSet;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
pub mod error;
pub use error::Result as GlrResult;
pub use GLRError as GlrError;
pub mod conflict_inspection;
pub use adze_ir::{Grammar, RuleId, StateId, SymbolId};
pub mod prelude {
pub use crate::{FirstFollowSets, ParseTable, build_lr1_automaton};
}
#[doc(hidden)]
pub mod advanced_conflict;
#[doc(hidden)]
pub mod conflict_resolution;
#[doc(hidden)]
pub mod conflict_visualizer;
#[doc(hidden)]
pub mod disambiguation;
#[doc(hidden)]
pub mod gss;
#[doc(hidden)]
pub mod gss_arena;
#[doc(hidden)]
pub mod parse_forest;
pub mod driver;
pub mod forest_view;
pub mod stack;
pub mod telemetry;
pub mod ts_lexer;
#[cfg(feature = "serialization")]
pub mod serialization;
#[cfg(any(feature = "glr_trace", feature = "debug_glr"))]
#[macro_export]
macro_rules! debug_trace {
($($t:tt)*) => { eprintln!("[GLR] {}", format!($($t)*)); }
}
#[cfg(not(any(feature = "glr_trace", feature = "debug_glr")))]
#[macro_export]
macro_rules! debug_trace {
($($t:tt)*) => {};
}
#[cfg(any(feature = "glr_trace", feature = "debug_glr"))]
#[macro_export]
macro_rules! glr_trace {
($($t:tt)*) => { debug_trace!($($t)*); }
}
#[cfg(not(any(feature = "glr_trace", feature = "debug_glr")))]
#[macro_export]
macro_rules! glr_trace {
($($t:tt)*) => { debug_trace!($($t)*); }
}
#[doc(hidden)]
pub mod perf_optimizations;
#[doc(hidden)]
pub mod precedence_compare;
#[doc(hidden)]
pub mod symbol_comparison;
#[doc(hidden)]
pub mod version_info;
pub mod lib_v2;
#[cfg(any(test, feature = "test-api"))]
pub mod test_helpers;
#[cfg(test)]
pub mod test_symbol_alloc;
#[doc(hidden)]
pub use advanced_conflict::{
ConflictAnalyzer, ConflictStats, PrecedenceDecision, PrecedenceResolver,
};
#[doc(hidden)]
pub use conflict_resolution::{RuntimeConflictResolver, VecWrapperResolver};
#[doc(hidden)]
pub use conflict_visualizer::{ConflictVisualizer, generate_dot_graph};
#[doc(hidden)]
pub use gss::{GSSStats, GraphStructuredStack, StackNode};
#[doc(hidden)]
pub use parse_forest::{ForestNode, ParseError, ParseForest, ParseNode, ParseTree};
#[doc(hidden)]
pub use perf_optimizations::{ParseTableCache, PerfStats, StackDeduplicator, StackPool};
#[doc(hidden)]
pub use precedence_compare::{
PrecedenceComparison, PrecedenceInfo, StaticPrecedenceResolver, compare_precedences,
};
#[doc(hidden)]
pub use symbol_comparison::{compare_symbols, compare_versions_with_symbols};
#[doc(hidden)]
pub use version_info::{CompareResult, VersionInfo, compare_versions};
const EOF_SENTINEL: SymbolId = SymbolId(0);
#[inline]
fn map_follow_symbol(sym: SymbolId, eof_symbol: SymbolId) -> SymbolId {
if sym == EOF_SENTINEL { eof_symbol } else { sym }
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum Assoc {
Left,
Right,
None,
}
#[derive(Copy, Clone, Debug)]
struct TokPrec {
prec: u8,
assoc: Assoc,
}
#[derive(Copy, Clone, Debug)]
struct RulePrec {
prec: u8,
assoc: Assoc,
}
struct PrecTables {
tok_prec_by_index: Vec<Option<TokPrec>>,
rule_prec: Vec<RulePrec>,
}
fn build_prec_tables(
grammar: &Grammar,
symbol_to_index: &BTreeMap<SymbolId, usize>,
token_count: u32,
production_count: u32,
) -> PrecTables {
use adze_ir::{Associativity, PrecedenceKind};
debug_assert!(production_count > 0, "production_count must be positive");
let mut tok_prec_by_index = vec![None; symbol_to_index.len()];
let tok_prec_len = tok_prec_by_index.len();
let mut set_tok_prec = |tok_idx: usize, new: TokPrec| {
if tok_idx >= tok_prec_by_index.len() {
return; }
tok_prec_by_index[tok_idx] = match tok_prec_by_index[tok_idx] {
None => Some(new),
Some(old) => Some(if new.prec > old.prec { new } else { old }),
};
};
let mut rule_prec = vec![
RulePrec {
prec: 0,
assoc: Assoc::None
};
production_count as usize
];
for rules in grammar.rules.values() {
for rule in rules {
let pid = rule.production_id.0 as usize;
if pid >= production_count as usize {
continue;
}
let explicit = rule.precedence.and_then(|p| {
if let PrecedenceKind::Static(level) = p {
Some(level as u8)
} else {
None
}
});
let rule_assoc = rule
.associativity
.map(|assoc| match assoc {
Associativity::Left => Assoc::Left,
Associativity::Right => Assoc::Right,
Associativity::None => Assoc::None,
})
.unwrap_or(Assoc::None);
if let Some(level) = explicit {
let tok_idx_opt = rule.rhs.iter().rev().find_map(|sym| {
if let Symbol::Terminal(id) = sym {
symbol_to_index.get(id).copied()
} else {
None
}
});
if let Some(tok_idx) = tok_idx_opt
&& tok_idx < tok_prec_len
{
set_tok_prec(
tok_idx,
TokPrec {
prec: level,
assoc: rule_assoc,
},
);
}
}
rule_prec[pid] = RulePrec {
prec: explicit.unwrap_or(0),
assoc: rule_assoc,
};
}
}
for rules in grammar.rules.values() {
for rule in rules {
let pid = rule.production_id.0 as usize;
if pid >= production_count as usize {
continue;
}
if rule_prec[pid].prec > 0 {
continue;
}
let derived = rule
.rhs
.iter()
.rev()
.find_map(|sym| {
if let Symbol::Terminal(id) = sym {
symbol_to_index.get(id).and_then(|&idx| {
if (idx as u32) < token_count {
tok_prec_by_index[idx]
} else {
None
}
})
} else {
None
}
})
.unwrap_or(TokPrec {
prec: 0,
assoc: Assoc::None,
});
rule_prec[pid] = RulePrec {
prec: derived.prec,
assoc: derived.assoc,
};
}
}
PrecTables {
tok_prec_by_index,
rule_prec,
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum PrecDecision {
PreferShift,
PreferReduce,
Error,
NoInfo,
}
#[inline]
fn decide_with_precedence(
lookahead_tok_idx: usize, reduce_prod_id: u16, prec: &PrecTables,
) -> PrecDecision {
if reduce_prod_id as usize >= prec.rule_prec.len() {
return PrecDecision::NoInfo;
}
let tokp = match prec
.tok_prec_by_index
.get(lookahead_tok_idx)
.and_then(|o| *o)
{
Some(p) => p,
None => return PrecDecision::NoInfo,
};
let rulep = prec.rule_prec[reduce_prod_id as usize];
if tokp.prec == 0 || rulep.prec == 0 {
return PrecDecision::NoInfo;
}
use core::cmp::Ordering::*;
match (tokp.prec.cmp(&rulep.prec), rulep.assoc) {
(Greater, _) => PrecDecision::PreferShift,
(Less, _) => PrecDecision::PreferReduce,
(Equal, Assoc::Left) => PrecDecision::PreferReduce, (Equal, Assoc::Right) => PrecDecision::PreferShift, (Equal, Assoc::None) => PrecDecision::Error,
}
}
#[inline]
fn decide_reduce_reduce(a: u16, b: u16, prec: &PrecTables) -> u16 {
let pa = prec.rule_prec.get(a as usize).map(|r| r.prec).unwrap_or(0);
let pb = prec.rule_prec.get(b as usize).map(|r| r.prec).unwrap_or(0);
if pa > pb {
a
} else if pb > pa {
b
} else {
a.min(b)
}
}
pub use driver::Driver;
pub use forest_view::{Forest, ForestView, Span};
#[cfg(feature = "perf_counters")]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub mod perf {
use std::sync::atomic::{AtomicU64, Ordering};
#[derive(Clone, Debug, Default)]
pub struct Counters {
pub shifts: u64,
pub reductions: u64,
pub forks: u64,
pub merges: u64,
}
static SHIFTS: AtomicU64 = AtomicU64::new(0);
static REDUCTIONS: AtomicU64 = AtomicU64::new(0);
static FORKS: AtomicU64 = AtomicU64::new(0);
static MERGES: AtomicU64 = AtomicU64::new(0);
#[inline]
pub fn inc_shifts(n: u64) {
SHIFTS.fetch_add(n, Ordering::Relaxed);
}
#[inline]
pub fn inc_reductions(n: u64) {
REDUCTIONS.fetch_add(n, Ordering::Relaxed);
}
#[inline]
pub fn inc_forks(n: u64) {
FORKS.fetch_add(n, Ordering::Relaxed);
}
#[inline]
pub fn inc_merges(n: u64) {
MERGES.fetch_add(n, Ordering::Relaxed);
}
pub fn snapshot() -> Counters {
Counters {
shifts: SHIFTS.load(Ordering::Relaxed),
reductions: REDUCTIONS.load(Ordering::Relaxed),
forks: FORKS.load(Ordering::Relaxed),
merges: MERGES.load(Ordering::Relaxed),
}
}
pub fn take() -> Counters {
Counters {
shifts: SHIFTS.swap(0, Ordering::Relaxed),
reductions: REDUCTIONS.swap(0, Ordering::Relaxed),
forks: FORKS.swap(0, Ordering::Relaxed),
merges: MERGES.swap(0, Ordering::Relaxed),
}
}
pub fn reset() {
SHIFTS.store(0, Ordering::Relaxed);
REDUCTIONS.store(0, Ordering::Relaxed);
FORKS.store(0, Ordering::Relaxed);
MERGES.store(0, Ordering::Relaxed);
}
}
#[cfg(not(feature = "perf_counters"))]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub mod perf {
#[derive(Clone, Debug, Default)]
pub struct Counters {
pub shifts: u64,
pub reductions: u64,
pub forks: u64,
pub merges: u64,
}
#[inline(always)]
pub fn inc_shifts(_: u64) {}
#[inline(always)]
pub fn inc_reductions(_: u64) {}
#[inline(always)]
pub fn inc_forks(_: u64) {}
#[inline(always)]
pub fn inc_merges(_: u64) {}
#[inline(always)]
pub fn snapshot() -> Counters {
Counters::default()
}
#[inline(always)]
pub fn take() -> Counters {
Counters::default()
}
#[inline(always)]
pub fn reset() {}
}
#[derive(Debug, Clone)]
pub struct FirstFollowSets {
first: IndexMap<SymbolId, FixedBitSet>,
follow: IndexMap<SymbolId, FixedBitSet>,
nullable: FixedBitSet,
#[allow(dead_code)]
symbol_count: usize,
}
impl FirstFollowSets {
fn get_max_symbol_id(symbol: &Symbol) -> u16 {
match symbol {
Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id.0,
Symbol::Optional(inner) | Symbol::Repeat(inner) | Symbol::RepeatOne(inner) => {
Self::get_max_symbol_id(inner)
}
Symbol::Choice(choices) => choices
.iter()
.map(Self::get_max_symbol_id)
.max()
.unwrap_or(0),
Symbol::Sequence(seq) => seq.iter().map(Self::get_max_symbol_id).max().unwrap_or(0),
Symbol::Epsilon => 0,
}
}
#[must_use = "computation result must be checked"]
pub fn compute_normalized(grammar: &mut Grammar) -> Result<Self, GLRError> {
grammar.normalize();
Self::compute(grammar)
}
#[must_use = "computation result must be checked"]
pub fn compute(grammar: &Grammar) -> Result<Self, GLRError> {
let normalized_grammar = {
let mut cloned = grammar.clone();
let _ = cloned.normalize(); cloned
};
let grammar = &normalized_grammar;
let max_rule_id = grammar.rules.keys().map(|id| id.0).max().unwrap_or(0);
let max_token_id = grammar.tokens.keys().map(|id| id.0).max().unwrap_or(0);
let max_external_id = grammar
.externals
.iter()
.map(|e| e.symbol_id.0)
.max()
.unwrap_or(0);
let mut max_rhs_id = 0u16;
for rules in grammar.rules.values() {
for rule in rules {
for symbol in &rule.rhs {
max_rhs_id = max_rhs_id.max(Self::get_max_symbol_id(symbol));
}
}
}
let symbol_count = (max_rule_id
.max(max_token_id)
.max(max_external_id)
.max(max_rhs_id)
+ 2) as usize;
let mut first = IndexMap::new();
let mut follow = IndexMap::new();
let mut nullable = FixedBitSet::with_capacity(symbol_count);
for &symbol_id in grammar.rules.keys().chain(grammar.tokens.keys()) {
first.insert(symbol_id, FixedBitSet::with_capacity(symbol_count));
follow.insert(symbol_id, FixedBitSet::with_capacity(symbol_count));
}
let mut changed = true;
while changed {
changed = false;
for rule in grammar.all_rules() {
let lhs = rule.lhs;
let mut rule_nullable = true;
for symbol in &rule.rhs {
match symbol {
Symbol::Terminal(id) => {
if let Some(first_set) = first.get_mut(&lhs)
&& !first_set.contains(id.0 as usize)
{
first_set.insert(id.0 as usize);
changed = true;
}
rule_nullable = false;
break;
}
Symbol::NonTerminal(id) | Symbol::External(id) => {
if let Some(symbol_first) = first.get(id).cloned()
&& let Some(lhs_first) = first.get_mut(&lhs)
{
let old_len = lhs_first.count_ones(..);
lhs_first.union_with(&symbol_first);
if lhs_first.count_ones(..) > old_len {
changed = true;
}
}
if !nullable.contains(id.0 as usize) {
rule_nullable = false;
break;
}
}
Symbol::Epsilon => {
}
Symbol::Optional(_)
| Symbol::Repeat(_)
| Symbol::RepeatOne(_)
| Symbol::Choice(_)
| Symbol::Sequence(_) => {
return Err(GLRError::ComplexSymbolsNotNormalized {
operation: "FIRST/FOLLOW computation".to_string(),
});
}
}
}
if rule_nullable && !nullable.contains(lhs.0 as usize) {
nullable.insert(lhs.0 as usize);
changed = true;
}
}
}
if let Some(start_symbol) = grammar.start_symbol()
&& let Some(follow_set) = follow.get_mut(&start_symbol)
{
follow_set.insert(0); }
changed = true;
while changed {
changed = false;
for rule in grammar.all_rules() {
if rule.rhs.len() >= 2
&& let (Symbol::NonTerminal(first_id), Symbol::NonTerminal(second_id)) =
(&rule.rhs[0], &rule.rhs[1])
&& *first_id == rule.lhs
{
if let Some(first_of_second) = first.get(second_id)
&& let Some(follow_set) = follow.get_mut(&rule.lhs)
{
let old_len = follow_set.count_ones(..);
follow_set.union_with(first_of_second);
if follow_set.count_ones(..) > old_len {
changed = true;
}
}
}
for (i, symbol) in rule.rhs.iter().enumerate() {
if let Symbol::NonTerminal(id) | Symbol::External(id) = symbol {
let remaining = &rule.rhs[i + 1..];
let first_of_remaining =
Self::first_of_sequence_static(remaining, &first, &nullable)?;
if let Some(follow_set) = follow.get_mut(id) {
let old_len = follow_set.count_ones(..);
follow_set.union_with(&first_of_remaining);
if follow_set.count_ones(..) > old_len {
changed = true;
}
}
if Self::sequence_is_nullable(remaining, &nullable)
&& let Some(lhs_follow) = follow.get(&rule.lhs).cloned()
&& let Some(follow_set) = follow.get_mut(id)
{
let old_len = follow_set.count_ones(..);
follow_set.union_with(&lhs_follow);
if follow_set.count_ones(..) > old_len {
changed = true;
}
}
}
}
}
}
Ok(Self {
first,
follow,
nullable,
symbol_count,
})
}
#[must_use = "computation result must be checked"]
pub fn first_of_sequence(&self, symbols: &[Symbol]) -> Result<FixedBitSet, GLRError> {
Self::first_of_sequence_static(symbols, &self.first, &self.nullable)
}
fn first_of_sequence_static(
symbols: &[Symbol],
first: &IndexMap<SymbolId, FixedBitSet>,
nullable: &FixedBitSet,
) -> Result<FixedBitSet, GLRError> {
let mut result = FixedBitSet::with_capacity(nullable.len());
for symbol in symbols {
match symbol {
Symbol::Terminal(id) => {
result.insert(id.0 as usize);
break;
}
Symbol::Epsilon => {
}
Symbol::NonTerminal(id) | Symbol::External(id) => {
if let Some(symbol_first) = first.get(id) {
result.union_with(symbol_first);
}
if !nullable.contains(id.0 as usize) {
break;
}
}
Symbol::Optional(_)
| Symbol::Repeat(_)
| Symbol::RepeatOne(_)
| Symbol::Choice(_)
| Symbol::Sequence(_) => {
return Err(GLRError::ComplexSymbolsNotNormalized {
operation: "FIRST/FOLLOW computation".to_string(),
});
}
}
}
Ok(result)
}
fn sequence_is_nullable(symbols: &[Symbol], nullable: &FixedBitSet) -> bool {
symbols.iter().all(|symbol| match symbol {
Symbol::Terminal(_) => false,
Symbol::NonTerminal(id) | Symbol::External(id) => nullable.contains(id.0 as usize),
Symbol::Epsilon => true,
Symbol::Optional(_)
| Symbol::Repeat(_)
| Symbol::RepeatOne(_)
| Symbol::Choice(_)
| Symbol::Sequence(_) => {
panic!("Complex symbols should be normalized before FIRST/FOLLOW computation");
}
})
}
pub fn first(&self, symbol: SymbolId) -> Option<&FixedBitSet> {
self.first.get(&symbol)
}
pub fn follow(&self, symbol: SymbolId) -> Option<&FixedBitSet> {
self.follow.get(&symbol)
}
pub fn is_nullable(&self, symbol: SymbolId) -> bool {
self.nullable.contains(symbol.0 as usize)
}
}
#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct LRItem {
pub rule_id: RuleId,
pub position: usize,
pub lookahead: SymbolId,
}
impl LRItem {
pub fn new(rule_id: RuleId, position: usize, lookahead: SymbolId) -> Self {
Self {
rule_id,
position,
lookahead,
}
}
pub fn is_reduce_item(&self, grammar: &Grammar) -> bool {
if let Some(rule) = grammar
.all_rules()
.find(|r| r.production_id.0 == self.rule_id.0)
{
if rule.rhs.len() == 1 && matches!(rule.rhs[0], Symbol::Epsilon) {
return true; }
self.position >= rule.rhs.len()
} else {
false
}
}
pub fn next_symbol<'a>(&self, grammar: &'a Grammar) -> Option<&'a Symbol> {
if let Some(rule) = grammar
.all_rules()
.find(|r| r.production_id.0 == self.rule_id.0)
{
rule.rhs.get(self.position)
} else {
None
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ItemSet {
pub items: BTreeSet<LRItem>,
pub id: StateId,
}
impl ItemSet {
pub fn new(id: StateId) -> Self {
Self {
items: BTreeSet::new(),
id,
}
}
pub fn add_item(&mut self, item: LRItem) {
self.items.insert(item);
}
pub fn closure(
&mut self,
grammar: &Grammar,
first_follow: &FirstFollowSets,
) -> Result<(), GLRError> {
let _initial_size = self.items.len();
let mut added = true;
let mut _iteration = 0;
while added {
added = false;
_iteration += 1;
let current_items: Vec<_> = self.items.iter().cloned().collect();
for item in current_items {
if let Some(Symbol::NonTerminal(symbol_id)) = item.next_symbol(grammar) {
if let Some(rules) = grammar.get_rules_for_symbol(*symbol_id) {
for rule in rules {
let mut beta = Vec::new();
if let Some(current_rule) = grammar
.all_rules()
.find(|r| r.production_id.0 == item.rule_id.0)
{
beta.extend_from_slice(¤t_rule.rhs[item.position + 1..]);
}
beta.push(Symbol::Terminal(item.lookahead));
let first_beta_alpha = first_follow.first_of_sequence(&beta)?;
for lookahead_idx in first_beta_alpha.ones() {
let new_item = LRItem::new(
RuleId(rule.production_id.0),
0,
SymbolId(lookahead_idx as u16),
);
if !self.items.contains(&new_item) {
self.items.insert(new_item);
added = true;
if rule.rhs.is_empty() {
}
}
}
}
}
}
}
}
Ok(())
}
pub fn goto(
&self,
symbol: &Symbol,
grammar: &Grammar,
_first_follow: &FirstFollowSets,
) -> ItemSet {
let mut new_set = ItemSet::new(StateId(0));
for item in &self.items {
if let Some(next_sym) = item.next_symbol(grammar)
&& std::mem::discriminant(next_sym) == std::mem::discriminant(symbol)
{
match (next_sym, symbol) {
(Symbol::Terminal(a), Symbol::Terminal(b))
| (Symbol::NonTerminal(a), Symbol::NonTerminal(b))
| (Symbol::External(a), Symbol::External(b))
if a == b =>
{
let new_item = LRItem::new(item.rule_id, item.position + 1, item.lookahead);
new_set.add_item(new_item);
}
_ => {}
}
}
}
let _ = new_set.closure(grammar, _first_follow);
new_set
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub struct ItemSetCollection {
pub sets: Vec<ItemSet>,
pub goto_table: IndexMap<(StateId, SymbolId), StateId>,
pub symbol_is_terminal: IndexMap<SymbolId, bool>,
}
impl ItemSetCollection {
pub fn build_canonical_collection_augmented(
grammar: &Grammar,
first_follow: &FirstFollowSets,
augmented_start: SymbolId,
_original_start: SymbolId,
eof_symbol: SymbolId,
) -> Self {
let mut collection = Self {
sets: Vec::new(),
goto_table: IndexMap::new(),
symbol_is_terminal: IndexMap::new(),
};
let mut initial_set = ItemSet::new(StateId(0));
if let Some(augmented_rules) = grammar.get_rules_for_symbol(augmented_start) {
for rule in augmented_rules {
let start_item = LRItem::new(
RuleId(rule.production_id.0),
0,
eof_symbol, );
initial_set.add_item(start_item);
}
}
let _ = initial_set.closure(grammar, first_follow);
debug_trace!(
"Initial state 0 after closure has {} items:",
initial_set.items.len()
);
let mut expected_terminals = std::collections::BTreeSet::new();
let mut expected_nonterminals = std::collections::BTreeSet::new();
for item in &initial_set.items {
if let Some(rule) = grammar
.all_rules()
.find(|r| r.production_id.0 == item.rule_id.0)
{
let mut rhs_str = String::new();
for (idx, sym) in rule.rhs.iter().enumerate() {
if idx == item.position {
rhs_str.push_str(" • ");
}
match sym {
Symbol::Terminal(id) => rhs_str.push_str(&format!("T({}) ", id.0)),
Symbol::NonTerminal(id) => rhs_str.push_str(&format!("NT({}) ", id.0)),
_ => rhs_str.push_str("? "),
}
}
if item.position == rule.rhs.len() {
rhs_str.push_str(" • ");
}
debug_trace!(
" Item: NT({}) -> {}, lookahead={}",
rule.lhs.0,
rhs_str,
item.lookahead.0
);
if item.position < rule.rhs.len() {
match &rule.rhs[item.position] {
Symbol::Terminal(t) => {
expected_terminals.insert(*t);
}
Symbol::NonTerminal(nt) => {
expected_nonterminals.insert(*nt);
}
_ => {}
}
}
}
}
debug_trace!("State 0 expects transitions for:");
debug_trace!(" Terminals: {:?}", expected_terminals);
debug_trace!(" Nonterminals: {:?}", expected_nonterminals);
collection.sets.push(initial_set);
let mut state_counter = 1;
let mut i = 0;
while i < collection.sets.len() {
let current_set = collection.sets[i].clone();
for item in ¤t_set.items {
if let Some(rule) = grammar
.all_rules()
.find(|r| r.production_id.0 == item.rule_id.0)
{
let mut rhs_str = String::new();
for (idx, sym) in rule.rhs.iter().enumerate() {
if idx == item.position {
rhs_str.push_str(" • ");
}
rhs_str.push_str(&format!("{:?} ", sym));
}
if item.position == rule.rhs.len() {
rhs_str.push_str(" • ");
}
}
}
let mut symbols = BTreeSet::new();
let mut _terminal_count = 0;
let mut _non_terminal_count = 0;
if i == 0 {
debug_trace!("\n=== State 0 Analysis ===");
debug_trace!("State 0 has {} items:", current_set.items.len());
}
for (_idx, item) in current_set.items.iter().enumerate() {
if i == 0 {
if let Some(rule) = grammar
.all_rules()
.find(|r| r.production_id.0 == item.rule_id.0)
{
let mut item_str = String::new();
item_str.push_str(&format!("NT({}) -> ", rule.lhs.0));
for (pos, sym) in rule.rhs.iter().enumerate() {
if pos == item.position {
item_str.push_str("• ");
}
match sym {
Symbol::Terminal(t) => item_str.push_str(&format!("T({}) ", t.0)),
Symbol::NonTerminal(nt) => {
item_str.push_str(&format!("NT({}) ", nt.0))
}
Symbol::External(e) => item_str.push_str(&format!("EXT({}) ", e.0)),
_ => item_str.push_str(&format!("{:?} ", sym)),
}
}
if item.position == rule.rhs.len() {
item_str.push_str("• ");
}
debug_trace!(" Item {}: {} (rule_id={})", _idx, item_str, item.rule_id.0);
}
}
if let Some(symbol) = item.next_symbol(grammar) {
match symbol {
Symbol::Terminal(_id) => {
_terminal_count += 1;
}
Symbol::NonTerminal(_id) => {
_non_terminal_count += 1;
}
Symbol::External(_id) => {
_terminal_count += 1; }
_ => {}
}
symbols.insert(symbol.clone());
if i == 0 {
debug_trace!(" -> next symbol: {:?}", symbol);
}
}
}
if i == 0 {
debug_trace!("\nState 0 summary:");
debug_trace!(" Total symbols that can be shifted: {}", symbols.len());
debug_trace!(" Terminals: {}", _terminal_count);
debug_trace!(" Non-terminals: {}", _non_terminal_count);
debug_trace!(" Symbols: {:?}\n", symbols);
}
for symbol in symbols {
let goto_set = current_set.goto(&symbol, grammar, first_follow);
if !goto_set.items.is_empty() {
let existing_state = collection
.sets
.iter()
.find(|set| set.items == goto_set.items)
.map(|set| set.id);
let target_state = if let Some(existing_id) = existing_state {
existing_id
} else {
let new_id = StateId(state_counter);
let mut new_set = goto_set;
new_set.id = new_id;
collection.sets.push(new_set);
state_counter += 1;
new_id
};
let symbol_id = match symbol {
Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
Symbol::Optional(_)
| Symbol::Repeat(_)
| Symbol::RepeatOne(_)
| Symbol::Choice(_)
| Symbol::Sequence(_)
| Symbol::Epsilon => {
panic!(
"Complex symbols should be normalized before LR item generation"
);
}
};
if current_set.id.0 == 0 {
debug_trace!(
" State 0 GOTO: symbol {:?} -> state {}",
symbol_id,
target_state.0
);
}
collection
.goto_table
.insert((current_set.id, symbol_id), target_state);
let is_terminal = matches!(symbol, Symbol::Terminal(_) | Symbol::External(_));
collection.symbol_is_terminal.insert(symbol_id, is_terminal);
}
}
i += 1;
}
collection
}
pub fn build_canonical_collection(grammar: &Grammar, first_follow: &FirstFollowSets) -> Self {
let mut collection = Self {
sets: Vec::new(),
goto_table: IndexMap::new(),
symbol_is_terminal: IndexMap::new(),
};
let mut initial_set = ItemSet::new(StateId(0));
if let Some(start_symbol) = grammar.start_symbol() {
if let Some(start_rules) = grammar.get_rules_for_symbol(start_symbol) {
for rule in start_rules.iter() {
let start_item = LRItem::new(
RuleId(rule.production_id.0),
0,
SymbolId(0), );
initial_set.add_item(start_item);
}
}
let _ = initial_set.closure(grammar, first_follow);
}
if initial_set.items.is_empty() {
} else {
for _item in &initial_set.items {
}
}
collection.sets.push(initial_set);
let mut state_counter = 1;
let mut i = 0;
while i < collection.sets.len() {
let current_set = collection.sets[i].clone();
for item in ¤t_set.items {
if let Some(rule) = grammar
.all_rules()
.find(|r| r.production_id.0 == item.rule_id.0)
{
let mut rhs_str = String::new();
for (idx, sym) in rule.rhs.iter().enumerate() {
if idx == item.position {
rhs_str.push_str(" • ");
}
rhs_str.push_str(&format!("{:?} ", sym));
}
if item.position == rule.rhs.len() {
rhs_str.push_str(" • ");
}
}
}
let mut symbols = BTreeSet::new();
let mut _terminal_count = 0;
let mut _non_terminal_count = 0;
if i == 0 {
debug_trace!("\n=== State 0 Analysis ===");
debug_trace!("State 0 has {} items:", current_set.items.len());
}
for (_idx, item) in current_set.items.iter().enumerate() {
if i == 0 {
if let Some(rule) = grammar
.all_rules()
.find(|r| r.production_id.0 == item.rule_id.0)
{
let mut item_str = String::new();
item_str.push_str(&format!("NT({}) -> ", rule.lhs.0));
for (pos, sym) in rule.rhs.iter().enumerate() {
if pos == item.position {
item_str.push_str("• ");
}
match sym {
Symbol::Terminal(t) => item_str.push_str(&format!("T({}) ", t.0)),
Symbol::NonTerminal(nt) => {
item_str.push_str(&format!("NT({}) ", nt.0))
}
Symbol::External(e) => item_str.push_str(&format!("EXT({}) ", e.0)),
_ => item_str.push_str(&format!("{:?} ", sym)),
}
}
if item.position == rule.rhs.len() {
item_str.push_str("• ");
}
debug_trace!(" Item {}: {} (rule_id={})", _idx, item_str, item.rule_id.0);
}
}
if let Some(symbol) = item.next_symbol(grammar) {
match symbol {
Symbol::Terminal(_id) => {
_terminal_count += 1;
}
Symbol::NonTerminal(_id) => {
_non_terminal_count += 1;
}
Symbol::External(_id) => {
_terminal_count += 1; }
_ => {}
}
symbols.insert(symbol.clone());
if i == 0 {
debug_trace!(" -> next symbol: {:?}", symbol);
}
}
}
if i == 0 {
debug_trace!("\nState 0 summary:");
debug_trace!(" Total symbols that can be shifted: {}", symbols.len());
debug_trace!(" Terminals: {}", _terminal_count);
debug_trace!(" Non-terminals: {}", _non_terminal_count);
debug_trace!(" Symbols: {:?}\n", symbols);
}
for item in ¤t_set.items {
if let Some(symbol) = item.next_symbol(grammar) {
let _symbol_id = match &symbol {
Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
_ => panic!("Complex symbol"),
};
}
}
for symbol in &symbols {
let _symbol_id = match symbol {
Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
_ => panic!("Complex symbol"),
};
}
for symbol in symbols {
let goto_set = current_set.goto(&symbol, grammar, first_follow);
if !goto_set.items.is_empty() {
let existing_state = collection
.sets
.iter()
.find(|set| set.items == goto_set.items)
.map(|set| set.id);
let target_state = if let Some(existing_id) = existing_state {
existing_id
} else {
let new_id = StateId(state_counter);
let mut new_set = goto_set;
new_set.id = new_id;
collection.sets.push(new_set);
state_counter += 1;
new_id
};
let symbol_id = match symbol {
Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
Symbol::Optional(_)
| Symbol::Repeat(_)
| Symbol::RepeatOne(_)
| Symbol::Choice(_)
| Symbol::Sequence(_)
| Symbol::Epsilon => {
panic!(
"Complex symbols should be normalized before LR item generation"
);
}
};
if current_set.id.0 == 0 {
debug_trace!(
" State 0 GOTO: symbol {:?} -> state {}",
symbol_id,
target_state.0
);
}
collection
.goto_table
.insert((current_set.id, symbol_id), target_state);
let is_terminal = matches!(symbol, Symbol::Terminal(_) | Symbol::External(_));
collection.symbol_is_terminal.insert(symbol_id, is_terminal);
}
}
i += 1;
}
collection
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
pub struct LexMode {
pub lex_state: u16,
pub external_lex_state: u16,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
pub enum GotoIndexing {
NonterminalMap,
DirectSymbolId,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub struct ParseTable {
pub action_table: Vec<Vec<ActionCell>>,
pub goto_table: Vec<Vec<StateId>>,
pub symbol_metadata: Vec<SymbolMetadata>,
pub state_count: usize,
pub symbol_count: usize,
pub symbol_to_index: BTreeMap<SymbolId, usize>,
pub index_to_symbol: Vec<SymbolId>,
pub external_scanner_states: Vec<Vec<bool>>,
pub rules: Vec<ParseRule>,
pub nonterminal_to_index: BTreeMap<SymbolId, usize>,
pub goto_indexing: GotoIndexing,
pub eof_symbol: SymbolId,
pub start_symbol: SymbolId,
pub grammar: Grammar,
pub initial_state: StateId,
pub token_count: usize,
pub external_token_count: usize,
pub lex_modes: Vec<LexMode>,
pub extras: Vec<SymbolId>,
pub dynamic_prec_by_rule: Vec<i16>,
pub rule_assoc_by_rule: Vec<i8>,
pub alias_sequences: Vec<Vec<Option<SymbolId>>>,
pub field_names: Vec<String>,
pub field_map: BTreeMap<(RuleId, u16), u16>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub struct ParseRule {
pub lhs: SymbolId,
pub rhs_len: u16,
}
impl Default for ParseTable {
fn default() -> Self {
Self {
action_table: vec![],
goto_table: vec![],
symbol_metadata: vec![],
state_count: 0,
symbol_count: 0,
symbol_to_index: BTreeMap::new(),
index_to_symbol: vec![],
external_scanner_states: vec![],
rules: vec![],
nonterminal_to_index: BTreeMap::new(),
goto_indexing: GotoIndexing::NonterminalMap,
eof_symbol: SymbolId(0),
start_symbol: SymbolId(0),
grammar: Grammar::new("default".to_string()),
initial_state: StateId(0),
token_count: 0,
external_token_count: 0,
lex_modes: vec![],
extras: vec![],
dynamic_prec_by_rule: vec![],
rule_assoc_by_rule: vec![],
alias_sequences: vec![],
field_names: vec![],
field_map: BTreeMap::new(),
}
}
}
impl ParseTable {
pub fn with_detected_goto_indexing(mut self) -> Self {
self.detect_goto_indexing();
self
}
pub fn normalize_eof_to_zero(mut self) -> Self {
if self.eof_symbol == SymbolId(0) {
return self;
}
let old_eof = self.eof_symbol;
#[cfg(debug_assertions)]
debug_trace!("Normalizing EOF from {:?} to SymbolId(0)", old_eof);
let old_idx = self.symbol_to_index.get(&old_eof).copied();
let zero_idx = self.symbol_to_index.get(&SymbolId(0)).copied();
if let (Some(old_idx), Some(zero_idx)) = (old_idx, zero_idx) {
for row in &mut self.action_table {
if old_idx < row.len() && zero_idx < row.len() {
row.swap(old_idx, zero_idx);
}
}
self.symbol_to_index.insert(SymbolId(0), old_idx);
self.symbol_to_index.remove(&old_eof);
if old_idx < self.index_to_symbol.len() {
self.index_to_symbol[old_idx] = SymbolId(0);
}
} else if let Some(old_idx) = old_idx {
self.symbol_to_index.remove(&old_eof);
self.symbol_to_index.insert(SymbolId(0), old_idx);
if old_idx < self.index_to_symbol.len() {
self.index_to_symbol[old_idx] = SymbolId(0);
}
} else {
self.symbol_to_index.insert(SymbolId(0), 0);
}
self.eof_symbol = SymbolId(0);
self
}
pub fn detect_goto_indexing(&mut self) {
let start_nt = self.start_symbol;
let col_map = self
.nonterminal_to_index
.get(&start_nt)
.and_then(|&c| self.goto_table.first().and_then(|row| row.get(c)))
.copied();
let col_direct = self
.goto_table
.first()
.and_then(|row| row.get(start_nt.0 as usize))
.copied();
self.goto_indexing = match (col_map, col_direct) {
(Some(s), _) if s.0 != 0 => GotoIndexing::NonterminalMap,
(_, Some(s)) if s.0 != 0 => GotoIndexing::DirectSymbolId,
_ => GotoIndexing::NonterminalMap,
};
}
#[inline]
pub fn terminal_boundary(&self) -> usize {
self.token_count + self.external_token_count
}
#[inline]
pub fn is_terminal(&self, sym: SymbolId) -> bool {
(sym.0 as usize) < self.terminal_boundary()
}
pub fn valid_symbols(&self, state: StateId) -> Vec<bool> {
let n = self.terminal_boundary();
let mut v = vec![false; n];
let s = state.0 as usize;
if s < self.action_table.len() {
for t in 0..n.min(self.action_table[s].len()) {
v[t] = !self.action_table[s][t].is_empty();
}
}
v
}
#[inline]
pub fn actions(&self, state: StateId, sym: SymbolId) -> &'_ [Action] {
let s = state.0 as usize;
let Some(&col) = self.symbol_to_index.get(&sym) else {
return &[];
};
if s >= self.action_table.len() || col >= self.action_table[s].len() {
return &[];
}
&self.action_table[s][col]
}
#[inline]
pub fn goto(&self, state: StateId, nt: SymbolId) -> Option<StateId> {
let s = state.0 as usize;
let &col = self.nonterminal_to_index.get(&nt)?;
let ns = *self.goto_table.get(s)?.get(col)?;
(ns.0 != u16::MAX).then_some(ns)
}
#[inline]
pub fn rule(&self, id: RuleId) -> (SymbolId, u16) {
let r = &self.rules[id.0 as usize];
(r.lhs, r.rhs_len)
}
#[inline]
pub fn eof(&self) -> SymbolId {
self.eof_symbol
}
#[inline]
pub fn start_symbol(&self) -> SymbolId {
self.start_symbol
}
#[inline]
pub fn grammar(&self) -> &Grammar {
&self.grammar
}
#[inline]
pub fn error_symbol(&self) -> SymbolId {
SymbolId(0)
}
#[inline]
pub fn valid_symbols_mask(&self, state: StateId) -> Vec<bool> {
let n = self.terminal_boundary();
let mut v = vec![false; n];
let s = state.0 as usize;
if s < self.action_table.len() {
for t in 0..n.min(self.action_table[s].len()) {
v[t] = !self.action_table[s][t].is_empty();
}
}
v
}
#[inline]
pub fn lex_mode(&self, state: StateId) -> LexMode {
let idx = state.0 as usize;
if idx < self.lex_modes.len() {
self.lex_modes[idx]
} else {
LexMode {
lex_state: 0,
external_lex_state: 0,
}
}
}
#[inline]
pub fn is_extra(&self, sym: SymbolId) -> bool {
self.extras.contains(&sym)
}
#[must_use = "validation result must be checked"]
pub fn validate(&self) -> Result<(), TableError> {
let terminal_boundary = self.token_count + self.external_token_count;
debug_assert_ne!(
self.eof_symbol,
parse_forest::ERROR_SYMBOL,
"EOF symbol cannot be the ERROR sentinel"
);
if self.eof_symbol == parse_forest::ERROR_SYMBOL {
return Err(TableError::EofIsError);
}
if (self.eof_symbol.0 as usize) < terminal_boundary {
return Err(TableError::EofNotSentinel {
eof: self.eof_symbol.0,
token_count: self.token_count as u32,
external_count: self.external_token_count as u32,
});
}
if !self.symbol_to_index.contains_key(&self.eof_symbol) {
return Err(TableError::EofMissingFromIndex);
}
let tb = self.terminal_boundary();
debug_assert!(
self.extras
.iter()
.all(|&sym| (sym.0 as usize) < self.token_count),
"all extras must be within [0..token_count)"
);
for sym_id in 0..self.token_count {
let sym = SymbolId(sym_id as u16);
debug_assert!(self.is_terminal(sym), "0..token_count are terminals");
debug_assert!(
(sym.0 as usize) < self.token_count,
"regular terminals are in [0..token_count)"
);
}
for sym_id in self.token_count..tb {
let sym = SymbolId(sym_id as u16);
debug_assert!(self.is_terminal(sym), "external tokens are terminals");
debug_assert!(
(sym.0 as usize) >= self.token_count && (sym.0 as usize) < tb,
"external tokens are in [token_count..terminal_boundary)"
);
}
debug_assert!(
self.symbol_to_index.contains_key(&self.eof_symbol),
"EOF must exist in ACTION map"
);
if terminal_boundary > 0 {
let end_symbol = SymbolId((terminal_boundary - 1) as u16);
if let (Some(&eof_col), Some(&end_col)) = (
self.symbol_to_index.get(&self.eof_symbol),
self.symbol_to_index.get(&end_symbol),
) {
for (state_idx, row) in self.action_table.iter().enumerate() {
if eof_col < row.len() && end_col < row.len() {
let eof_actions = &row[eof_col];
let end_actions = &row[end_col];
if eof_actions.is_empty() != end_actions.is_empty() {
return Err(TableError::EofParityMismatch(state_idx as u16));
}
}
}
}
}
Ok(())
}
pub fn remap_goto_to_direct_symbol_id(mut self) -> Self {
if matches!(self.goto_indexing, GotoIndexing::DirectSymbolId) {
return self;
}
let max_sym = self
.nonterminal_to_index
.keys()
.map(|s| s.0 as usize)
.max()
.unwrap_or(0);
let new_width = max_sym + 1;
for row in &mut self.goto_table {
debug_assert!(
self.nonterminal_to_index.values().all(|&c| c < row.len()),
"nonterminal_to_index contains a column >= row width"
);
let mut new_row = vec![StateId(0); new_width];
for (sym, &old_col) in &self.nonterminal_to_index {
if old_col < row.len() {
new_row[sym.0 as usize] = row[old_col];
}
}
*row = new_row;
}
self.goto_indexing = GotoIndexing::DirectSymbolId;
self
}
pub fn remap_goto_to_nonterminal_map(mut self) -> Self {
if matches!(self.goto_indexing, GotoIndexing::NonterminalMap) {
return self;
}
let width = self
.nonterminal_to_index
.values()
.copied()
.max()
.unwrap_or(0)
+ 1;
for row in &mut self.goto_table {
debug_assert!(
self.nonterminal_to_index
.keys()
.all(|s| (s.0 as usize) < row.len()),
"nonterminal_to_index contains a symbol id >= row width"
);
let mut new_row = vec![StateId(0); width];
for (sym, &col) in &self.nonterminal_to_index {
let src = sym.0 as usize;
if src < row.len() && col < new_row.len() {
new_row[col] = row[src];
}
}
*row = new_row;
}
self.goto_indexing = GotoIndexing::NonterminalMap;
self
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub enum Action {
Shift(StateId),
Reduce(RuleId),
Accept,
Error,
Recover,
Fork(Vec<Action>),
}
pub type ActionCell = Vec<Action>;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub struct SymbolMetadata {
pub name: String,
pub is_visible: bool,
pub is_named: bool,
pub is_supertype: bool,
pub is_terminal: bool,
pub is_extra: bool,
pub is_fragile: bool,
pub symbol_id: SymbolId,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub struct ConflictResolver {
pub conflicts: Vec<Conflict>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub struct Conflict {
pub state: StateId,
pub symbol: SymbolId,
pub actions: Vec<Action>,
pub conflict_type: ConflictType,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub enum ConflictType {
ShiftReduce,
ReduceReduce,
}
impl ConflictResolver {
pub fn detect_conflicts(
item_sets: &ItemSetCollection,
grammar: &Grammar,
_first_follow: &FirstFollowSets,
) -> Self {
let mut conflicts = Vec::new();
for item_set in &item_sets.sets {
let mut actions_by_symbol: IndexMap<SymbolId, Vec<Action>> = IndexMap::new();
for item in &item_set.items {
if item.is_reduce_item(grammar) {
let mut is_accept = false;
if let Some(start_symbol) = grammar.start_symbol() {
for rule in grammar.all_rules() {
if rule.production_id.0 == item.rule_id.0 {
is_accept =
rule.lhs == start_symbol && item.lookahead == SymbolId(0);
break;
}
}
}
let action = if is_accept {
Action::Accept
} else {
Action::Reduce(item.rule_id)
};
actions_by_symbol
.entry(item.lookahead)
.or_default()
.push(action);
} else if let Some(symbol) = item.next_symbol(grammar) {
let symbol_id = match symbol {
Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => {
*id
}
Symbol::Optional(_)
| Symbol::Repeat(_)
| Symbol::RepeatOne(_)
| Symbol::Choice(_)
| Symbol::Sequence(_)
| Symbol::Epsilon => {
panic!(
"Complex symbols should be normalized before LR item generation"
);
}
};
if let Some(target_state) = item_sets.goto_table.get(&(item_set.id, symbol_id))
{
let action = Action::Shift(*target_state);
actions_by_symbol.entry(symbol_id).or_default().push(action);
}
}
}
for (symbol_id, actions) in actions_by_symbol {
if actions.len() > 1 {
let conflict_type = if actions.iter().any(|a| matches!(a, Action::Shift(_)))
&& actions.iter().any(|a| matches!(a, Action::Reduce(_)))
{
ConflictType::ShiftReduce
} else {
ConflictType::ReduceReduce
};
conflicts.push(Conflict {
state: item_set.id,
symbol: symbol_id,
actions,
conflict_type,
});
}
}
}
Self { conflicts }
}
pub fn resolve_conflicts(&mut self, grammar: &Grammar) {
let mut conflicts_to_resolve = self.conflicts.clone();
for conflict in &mut conflicts_to_resolve {
self.resolve_single_conflict(conflict, grammar);
}
self.conflicts = conflicts_to_resolve;
}
fn resolve_single_conflict(&self, conflict: &mut Conflict, grammar: &Grammar) {
match conflict.conflict_type {
ConflictType::ShiftReduce => {
self.resolve_shift_reduce_conflict(conflict, grammar);
}
ConflictType::ReduceReduce => {
self.resolve_reduce_reduce_conflict(conflict, grammar);
}
}
}
fn resolve_shift_reduce_conflict(&self, conflict: &mut Conflict, grammar: &Grammar) {
let precedence_resolver = StaticPrecedenceResolver::from_grammar(grammar);
let mut shift_action = None;
let mut reduce_action = None;
for action in &conflict.actions {
match action {
Action::Shift(_) => shift_action = Some(action.clone()),
Action::Reduce(_) => reduce_action = Some(action.clone()),
_ => {}
}
}
match (shift_action, reduce_action) {
(Some(shift), Some(reduce)) => {
let shift_prec = precedence_resolver.token_precedence(conflict.symbol);
let reduce_prec = if let Action::Reduce(rule_id) = &reduce {
precedence_resolver.rule_precedence(*rule_id)
} else {
None
};
match compare_precedences(shift_prec, reduce_prec) {
PrecedenceComparison::PreferShift => {
conflict.actions = vec![shift];
}
PrecedenceComparison::PreferReduce => {
conflict.actions = vec![reduce];
}
PrecedenceComparison::Error => {
conflict.actions = vec![Action::Fork(vec![shift, reduce])];
}
PrecedenceComparison::None => {
conflict.actions = vec![Action::Fork(vec![shift, reduce])];
}
}
}
_ => {
}
}
}
fn resolve_reduce_reduce_conflict(&self, conflict: &mut Conflict, _grammar: &Grammar) {
let mut best_action = None;
let mut best_rule_id = u16::MAX;
for action in &conflict.actions {
if let Action::Reduce(rule_id) = action
&& rule_id.0 < best_rule_id
{
best_rule_id = rule_id.0;
best_action = Some(action.clone());
}
}
if let Some(action) = best_action {
conflict.actions = vec![action];
}
}
}
#[derive(Debug, thiserror::Error)]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub enum GLRError {
#[error("Grammar error: {0}")]
GrammarError(#[from] GrammarError),
#[error("Conflict resolution failed: {0}")]
ConflictResolution(String),
#[error("State machine generation failed: {0}")]
StateMachine(String),
#[error("Table validation failed: {0}")]
TableValidation(TableError),
#[error("Complex symbols must be normalized before {operation}")]
ComplexSymbolsNotNormalized { operation: String },
#[error("Expected {expected} symbol, found complex symbol")]
ExpectedSimpleSymbol { expected: String },
#[error("Invalid symbol state during {operation}")]
InvalidSymbolState { operation: String },
}
#[derive(Debug, thiserror::Error)]
#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
pub enum TableError {
#[error("EOF symbol collides with ERROR")]
EofIsError,
#[error(
"EOF symbol must be >= token_count + external_token_count (EOF: {eof}, tokens: {token_count}, externals: {external_count})"
)]
EofNotSentinel {
eof: u16,
token_count: u32,
external_count: u32,
},
#[error("EOF not present in symbol_to_index")]
EofMissingFromIndex,
#[error("EOF column parity mismatch at state {0}")]
EofParityMismatch(u16),
}
#[allow(dead_code)]
fn can_derive_start(grammar: &Grammar, symbol: SymbolId, start: SymbolId) -> bool {
if symbol == start {
return true;
}
if let Some(rules) = grammar.get_rules_for_symbol(symbol) {
for rule in rules {
if rule.rhs.len() == 1
&& let Symbol::NonTerminal(target) = &rule.rhs[0]
&& *target == start
{
return true;
}
}
}
false
}
fn normalize_action_table(action_table: &mut Vec<Vec<ActionCell>>) {
for row in action_table.iter_mut() {
for cell in row.iter_mut() {
normalize_action_cell(cell);
}
}
}
fn normalize_action_cell(cell: &mut ActionCell) {
for action in cell.iter_mut() {
normalize_action(action);
}
cell.sort_by_key(action_sort_key);
cell.dedup();
}
fn normalize_action(action: &mut Action) {
if let Action::Fork(inner) = action {
for inner_action in inner.iter_mut() {
normalize_action(inner_action);
}
inner.sort_by_key(action_sort_key);
inner.dedup();
}
}
fn action_sort_key(action: &Action) -> (u8, u16, u16, u16) {
match action {
Action::Shift(s) => (0, s.0, 0, 0),
Action::Reduce(r) => (1, r.0, 0, 0),
Action::Accept => (2, 0, 0, 0),
Action::Error => (3, 0, 0, 0),
Action::Recover => (4, 0, 0, 0),
Action::Fork(inner) => {
let first = inner.first().map(action_sort_key).unwrap_or((0, 0, 0, 0));
(5, first.1, first.2, inner.len() as u16)
}
}
}
pub fn build_lr1_automaton(
grammar: &Grammar,
first_follow: &FirstFollowSets,
) -> Result<ParseTable, GLRError> {
let mut rule_count = 0;
for rule in grammar.all_rules() {
if rule_count >= 10 {
break;
}
let mut rhs_str = String::new();
for sym in &rule.rhs {
match sym {
Symbol::Terminal(id) => rhs_str.push_str(&format!("T({}) ", id.0)),
Symbol::NonTerminal(id) => rhs_str.push_str(&format!("NT({}) ", id.0)),
_ => rhs_str.push_str("? "),
}
}
rule_count += 1;
}
let nonterminal_symbols: BTreeSet<SymbolId> = grammar.rules.keys().copied().collect();
let external_symbols: BTreeSet<SymbolId> =
grammar.externals.iter().map(|e| e.symbol_id).collect();
let mut rhs_terminals: BTreeSet<SymbolId> = BTreeSet::new();
for rule in grammar.all_rules() {
for sym in &rule.rhs {
if let Symbol::Terminal(id) = sym {
rhs_terminals.insert(*id);
}
}
}
let max_symbol = grammar
.tokens
.keys()
.chain(grammar.rule_names.keys())
.chain(nonterminal_symbols.iter())
.chain(external_symbols.iter())
.chain(rhs_terminals.iter())
.map(|s| s.0)
.max()
.unwrap_or(0);
let eof_symbol = SymbolId(max_symbol.checked_add(1).ok_or_else(|| {
GLRError::StateMachine("EOF symbol would overflow u16: grammar has too many symbols".into())
})?);
let mut augmented_grammar = grammar.clone();
let original_start =
grammar
.start_symbol()
.ok_or(GLRError::GrammarError(GrammarError::UnresolvedSymbol(
SymbolId(0),
)))?;
if let Some(_name) = grammar.rule_names.get(&original_start) {}
let augmented_start_id = max_symbol.checked_add(2).ok_or_else(|| {
GLRError::StateMachine(
"Augmented start symbol would overflow u16: grammar has too many symbols".into(),
)
})?;
let augmented_start = SymbolId(augmented_start_id);
let max_production_id = grammar
.all_rules()
.map(|r| r.production_id.0)
.max()
.unwrap_or(0);
let augmented_production_id = max_production_id
.checked_add(1)
.ok_or_else(|| GLRError::StateMachine("Production ID overflow".into()))?;
let augmented_rule = Rule {
lhs: augmented_start,
rhs: vec![Symbol::NonTerminal(original_start)],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(augmented_production_id),
};
augmented_grammar
.rules
.insert(augmented_start, vec![augmented_rule]);
augmented_grammar
.rule_names
.insert(augmented_start, "$start".to_string());
let collection = ItemSetCollection::build_canonical_collection_augmented(
&augmented_grammar,
first_follow,
augmented_start,
original_start,
eof_symbol,
);
let mut symbol_to_index = BTreeMap::new();
symbol_to_index.insert(eof_symbol, 0);
let mut internal_terminals: BTreeSet<SymbolId> = grammar.tokens.keys().copied().collect();
internal_terminals.extend(rhs_terminals.iter().copied());
internal_terminals.remove(&eof_symbol);
for id in &external_symbols {
internal_terminals.remove(id);
}
for id in &nonterminal_symbols {
internal_terminals.remove(id);
}
let mut internal_tokens: Vec<SymbolId> = internal_terminals.into_iter().collect();
internal_tokens.sort_by_key(|s| s.0);
for &id in &internal_tokens {
if !symbol_to_index.contains_key(&id) {
let idx = symbol_to_index.len();
symbol_to_index.insert(id, idx);
}
}
let mut ext_tokens: Vec<SymbolId> = external_symbols.iter().copied().collect();
ext_tokens.sort_by_key(|s| s.0);
for &id in &ext_tokens {
if !symbol_to_index.contains_key(&id) {
let idx = symbol_to_index.len();
symbol_to_index.insert(id, idx);
}
}
let mut non_terminals: Vec<SymbolId> = nonterminal_symbols.iter().copied().collect();
non_terminals.sort_by_key(|s| s.0);
for id in non_terminals {
if !symbol_to_index.contains_key(&id) {
let idx = symbol_to_index.len();
symbol_to_index.insert(id, idx);
}
}
let mut other_symbols: Vec<SymbolId> = grammar
.rule_names
.keys()
.cloned()
.filter(|id| !symbol_to_index.contains_key(id))
.collect();
other_symbols.sort_by_key(|s| s.0);
if !other_symbols.is_empty() {
return Err(GLRError::StateMachine(format!(
"Unexpected symbols outside terminal/nonterminal partitions: {:?}",
other_symbols
)));
}
let indexed_symbol_count = symbol_to_index.len();
let state_count = collection.sets.len();
let symbol_count = indexed_symbol_count;
let mut action_table = vec![vec![Vec::new(); indexed_symbol_count]; state_count];
let mut goto_table = vec![vec![StateId(0); indexed_symbol_count]; state_count];
let mut conflicts_by_state: BTreeMap<(usize, usize), Vec<Action>> = BTreeMap::new();
let mut rules = Vec::new();
let mut dynamic_prec_by_rule = Vec::new();
let mut rule_assoc_by_rule = Vec::new();
let mut production_to_rule_id = BTreeMap::new();
for (rule_id, rule) in grammar.all_rules().enumerate() {
production_to_rule_id.insert(rule.production_id.0, rule_id as u16);
rules.push(ParseRule {
lhs: rule.lhs,
rhs_len: rule.rhs.len() as u16,
});
let prec = match rule.precedence {
Some(adze_ir::PrecedenceKind::Static(p)) => p,
Some(adze_ir::PrecedenceKind::Dynamic(p)) => p,
None => 0,
};
dynamic_prec_by_rule.push(prec);
let assoc = match rule.associativity {
Some(adze_ir::Associativity::Left) => 1,
Some(adze_ir::Associativity::Right) => -1,
_ => 0,
};
rule_assoc_by_rule.push(assoc);
}
debug_trace!(
"DEBUG: Collection goto table has {} entries",
collection.goto_table.len()
);
debug_trace!(
"DEBUG: Augmented grammar has {} tokens",
augmented_grammar.tokens.len()
);
debug_trace!("=== Symbol Classification Debug ===");
debug_trace!(
"Tokens in augmented_grammar: {:?}",
augmented_grammar
.tokens
.keys()
.map(|k| k.0)
.collect::<Vec<_>>()
);
debug_trace!(
"Externals in augmented_grammar: {:?}",
augmented_grammar
.externals
.iter()
.map(|e| e.symbol_id.0)
.collect::<Vec<_>>()
);
debug_trace!("Original grammar tokens: {}", grammar.tokens.len());
debug_trace!(
"Collection goto_table size: {}",
collection.goto_table.len()
);
let state0_gotos: Vec<_> = collection
.goto_table
.iter()
.filter(|((from, _), _)| from.0 == 0)
.collect();
debug_trace!("State 0 has {} goto entries", state0_gotos.len());
for ((_, _symbol), _to_state) in &state0_gotos {
debug_trace!(" Symbol {} -> State {}", _symbol.0, _to_state.0);
}
let mut _terminal_count = 0;
let mut _non_terminal_count = 0;
for ((from_state, symbol), to_state) in &collection.goto_table {
let is_terminal = collection
.symbol_is_terminal
.get(symbol)
.copied()
.unwrap_or(*symbol == eof_symbol);
if from_state.0 == 0 {
debug_trace!(
"State 0 goto entry: symbol {} -> state {}, is_terminal={} (in tokens={}, in externals={}, is EOF={})",
symbol.0,
to_state.0,
is_terminal,
augmented_grammar.tokens.contains_key(symbol),
augmented_grammar
.externals
.iter()
.any(|e| e.symbol_id == *symbol),
symbol.0 == 0
);
}
if is_terminal {
_terminal_count += 1;
if let Some(&symbol_idx) = symbol_to_index.get(symbol) {
let state_idx = from_state.0 as usize;
if state_idx < action_table.len() && symbol_idx < action_table[state_idx].len() {
let new_action = Action::Shift(*to_state);
if state_idx == 0 {
debug_trace!(
"DEBUG: Adding shift action to state 0: symbol {} (idx={}) -> state {}",
symbol.0,
symbol_idx,
to_state.0
);
}
add_action_with_conflict(
&mut action_table,
&mut conflicts_by_state,
state_idx,
symbol_idx,
new_action,
);
} else if state_idx == 0 {
debug_trace!(
"DEBUG: SKIPPING shift for state 0: bounds check failed - state_idx={}, symbol_idx={}, action_table.len={}, inner_len={}",
state_idx,
symbol_idx,
action_table.len(),
if state_idx < action_table.len() {
action_table[state_idx].len()
} else {
0
}
);
}
} else if from_state.0 == 0 {
debug_trace!(
"DEBUG: Terminal {} not in symbol_to_index for state 0",
symbol.0
);
}
} else {
_non_terminal_count += 1;
}
}
for state_idx in 0..state_count {
for extra_symbol_id in &augmented_grammar.extras {
if let Some(&symbol_idx) = symbol_to_index.get(extra_symbol_id) {
if action_table[state_idx][symbol_idx].is_empty() {
action_table[state_idx][symbol_idx]
.push(Action::Shift(StateId(state_idx as u16)));
}
}
}
}
for item_set in &collection.sets {
let state_idx = item_set.id.0 as usize;
for item in &item_set.items {
if item.is_reduce_item(&augmented_grammar) {
if let Some(rule) = augmented_grammar
.all_rules()
.find(|r| r.production_id.0 == item.rule_id.0)
&& rule.lhs == augmented_start
{
if item.lookahead == eof_symbol {
if let Some(&eof_idx) = symbol_to_index.get(&eof_symbol) {
add_action_with_conflict(
&mut action_table,
&mut conflicts_by_state,
state_idx,
eof_idx,
Action::Accept,
);
}
}
continue;
}
if let Some(&rule_id) = production_to_rule_id.get(&item.rule_id.0) {
let rule = &rules[rule_id as usize];
let is_empty_production = rule.rhs_len == 0;
let lookaheads_to_check: Vec<SymbolId> = if is_empty_production {
if let Some(follow_set) = first_follow.follow(rule.lhs) {
follow_set
.ones()
.map(|idx| map_follow_symbol(SymbolId(idx as u16), eof_symbol))
.collect()
} else {
vec![item.lookahead]
}
} else {
vec![item.lookahead]
};
for lookahead in lookaheads_to_check {
if let Some(&lookahead_idx) = symbol_to_index.get(&lookahead) {
let new_action = Action::Reduce(RuleId(rule_id));
add_action_with_conflict(
&mut action_table,
&mut conflicts_by_state,
state_idx,
lookahead_idx,
new_action,
);
}
}
}
}
}
}
let production_count = augmented_grammar.all_rules().count() as u32;
let token_count = (internal_tokens.len() + 1) as u32;
let prec_tables = build_prec_tables(
&augmented_grammar,
&symbol_to_index,
token_count,
production_count,
);
let first_nonterminal_idx = internal_tokens.len() + ext_tokens.len() + 1;
for ((state_idx, symbol_idx), _actions) in conflicts_by_state {
debug_assert!(state_idx < action_table.len(), "state_idx out of bounds");
debug_assert!(
symbol_idx < action_table[0].len(),
"symbol_idx out of bounds"
);
if symbol_idx >= first_nonterminal_idx {
continue; }
let cell = &mut action_table[state_idx][symbol_idx];
if cell.is_empty() {
continue;
}
if cell.iter().any(|a| matches!(a, Action::Accept)) {
*cell = vec![Action::Accept];
continue;
}
let first_shift = cell.iter().find_map(|a| {
if let Action::Shift(s) = a {
Some(*s)
} else {
None
}
});
let mut reduces: Vec<u16> = cell
.iter()
.filter_map(|a| {
if let Action::Reduce(pid) = a {
Some(pid.0)
} else {
None
}
})
.collect();
if reduces.len() > 1 {
let winner = reduces[1..].iter().fold(reduces[0], |acc, &r| {
decide_reduce_reduce(acc, r, &prec_tables)
});
reduces.clear();
reduces.push(winner);
cell.retain(|a| {
matches!(a, Action::Shift(_)) || matches!(a, Action::Reduce(pid) if pid.0 == winner)
});
}
if let (Some(s), Some(r)) = (first_shift, reduces.first().copied()) {
match decide_with_precedence(symbol_idx, r, &prec_tables) {
PrecDecision::PreferShift => *cell = vec![Action::Shift(s)],
PrecDecision::PreferReduce => *cell = vec![Action::Reduce(RuleId(r))],
PrecDecision::Error => {
}
PrecDecision::NoInfo => {
}
}
}
}
for ((from_state, symbol), _to_state) in &collection.goto_table {
let is_terminal = collection
.symbol_is_terminal
.get(symbol)
.copied()
.unwrap_or(*symbol == eof_symbol);
if !is_terminal && let Some(&symbol_idx) = symbol_to_index.get(symbol) {
let state_idx = from_state.0 as usize;
if state_idx < goto_table.len() && symbol_idx < goto_table[state_idx].len() {
}
}
}
for ((from_state, symbol), to_state) in &collection.goto_table {
let from_idx = from_state.0 as usize;
if let Some(&symbol_idx) = symbol_to_index.get(symbol) {
goto_table[from_idx][symbol_idx] = *to_state;
}
}
if let Some(_start_symbol) = grammar.start_symbol() {
for (state_idx, _item_set) in collection.sets.iter().enumerate() {
let needs_eof_reduce = false;
let reduce_rule_id: Option<RuleId> = None;
if needs_eof_reduce
&& let Some(rule_id) = reduce_rule_id
&& let Some(&eof_idx) = symbol_to_index.get(&SymbolId(0))
{
if action_table[state_idx][eof_idx].is_empty() {
action_table[state_idx][eof_idx].push(Action::Reduce(rule_id));
}
}
}
}
let mut symbol_metadata = Vec::new();
for (symbol_id, token) in &grammar.tokens {
symbol_metadata.push(SymbolMetadata {
name: token.name.clone(),
is_visible: !token.name.starts_with('_'),
is_named: !matches!(&token.pattern, TokenPattern::String(_)),
is_supertype: false,
is_terminal: true,
is_extra: grammar.extras.contains(symbol_id),
is_fragile: false, symbol_id: *symbol_id,
});
}
for symbol_id in grammar.rules.keys() {
let is_supertype = grammar.supertypes.contains(symbol_id);
symbol_metadata.push(SymbolMetadata {
name: format!("rule_{}", symbol_id.0),
is_visible: true,
is_named: true,
is_supertype,
is_terminal: false,
is_extra: false, is_fragile: false, symbol_id: *symbol_id,
});
}
for external in &grammar.externals {
symbol_metadata.push(SymbolMetadata {
name: external.name.clone(),
is_visible: !external.name.starts_with('_'),
is_named: true,
is_supertype: false,
is_terminal: true, is_extra: false, is_fragile: false, symbol_id: external.symbol_id, });
}
let mut external_scanner_states =
vec![vec![false; augmented_grammar.externals.len()]; state_count];
let mut external_symbol_to_idx = BTreeMap::new();
for (idx, external) in augmented_grammar.externals.iter().enumerate() {
external_symbol_to_idx.insert(external.symbol_id, idx);
}
for state_idx in 0..state_count {
for (external_idx, external) in augmented_grammar.externals.iter().enumerate() {
if let Some(&symbol_idx) = symbol_to_index.get(&external.symbol_id) {
if action_table[state_idx][symbol_idx]
.iter()
.any(|a| matches!(a, Action::Shift(_)))
{
external_scanner_states[state_idx][external_idx] = true;
}
}
}
}
let mut nonterminal_to_index = BTreeMap::new();
for (&symbol_id, &idx) in &symbol_to_index {
if nonterminal_symbols.contains(&symbol_id) {
nonterminal_to_index.insert(symbol_id, idx);
}
}
let _rule_count = rules.len();
let token_count = internal_tokens.len() + 1;
let external_token_count = ext_tokens.len();
normalize_action_table(&mut action_table);
let mut index_to_symbol = vec![SymbolId(u16::MAX); symbol_to_index.len()];
for (sym, &idx) in &symbol_to_index {
index_to_symbol[idx] = *sym;
}
let mut table = ParseTable {
action_table,
goto_table,
symbol_metadata,
state_count,
symbol_count,
symbol_to_index,
index_to_symbol,
external_scanner_states,
rules,
nonterminal_to_index,
goto_indexing: GotoIndexing::NonterminalMap, eof_symbol,
start_symbol: original_start,
grammar: grammar.clone(),
initial_state: StateId(0), token_count,
external_token_count,
lex_modes: vec![
LexMode {
lex_state: 0,
external_lex_state: 0
};
state_count
],
extras: vec![], dynamic_prec_by_rule, rule_assoc_by_rule, alias_sequences: vec![], field_names: vec![], field_map: BTreeMap::new(), };
table.detect_goto_indexing();
Ok(table)
}
#[must_use = "validation result must be checked"]
pub fn sanity_check_tables(pt: &ParseTable) -> Result<(), String> {
let eof_col = pt
.symbol_to_index
.get(&pt.eof_symbol)
.ok_or_else(|| format!("EOF symbol {} not in symbol_to_index", pt.eof_symbol.0))?;
let accept_somewhere = pt.action_table.iter().any(|row| {
row.get(*eof_col)
.and_then(|cell| cell.iter().find(|a| matches!(a, Action::Accept)))
.is_some()
});
if !accept_somewhere {
return Err("No ACCEPT on EOF found in action table".to_string());
}
for pid in 0..pt.rules.len() {
let lhs = pt.rules[pid].lhs;
let lhs_idx = pt
.symbol_to_index
.get(&lhs)
.ok_or_else(|| format!("LHS symbol {} not in symbol_to_index", lhs.0))?;
if *lhs_idx < pt.token_count {
return Err(format!(
"LHS must be a non-terminal column (pid={}, lhs_idx={}, token_count={})",
pid, lhs_idx, pt.token_count
));
}
let any = pt
.goto_table
.iter()
.any(|row| row.get(*lhs_idx).is_some_and(|s| s.0 != 0));
if !any {
return Err(format!("No goto(state, lhs(pid={})) present", pid));
}
}
for (sym, &idx) in &pt.symbol_to_index {
if idx >= pt.index_to_symbol.len() {
return Err(format!(
"symbol_to_index has index {} but index_to_symbol has length {}",
idx,
pt.index_to_symbol.len()
));
}
if pt.index_to_symbol[idx] != *sym {
return Err(format!(
"index_to_symbol[{}] = {} but should be {}",
idx, pt.index_to_symbol[idx].0, sym.0
));
}
}
Ok(())
}
fn add_action_with_conflict(
action_table: &mut Vec<Vec<ActionCell>>,
conflicts_by_state: &mut BTreeMap<(usize, usize), Vec<Action>>,
state_idx: usize,
symbol_idx: usize,
new_action: Action,
) {
if state_idx >= action_table.len() || symbol_idx >= action_table[0].len() {
panic!(
"Index out of bounds in add_action_with_conflict: state_idx={}, symbol_idx={}, table_size={}x{}",
state_idx,
symbol_idx,
action_table.len(),
if action_table.is_empty() {
0
} else {
action_table[0].len()
}
);
}
let current_cell = &mut action_table[state_idx][symbol_idx];
if !current_cell.iter().any(|a| action_eq(a, &new_action)) {
current_cell.push(new_action.clone());
if current_cell.len() > 1 {
let entry = conflicts_by_state
.entry((state_idx, symbol_idx))
.or_default();
*entry = current_cell.clone();
}
}
}
pub fn build_lr1_automaton_res(
grammar: &Grammar,
first_follow: &FirstFollowSets,
) -> GlrResult<ParseTable> {
build_lr1_automaton(grammar, first_follow)
}
fn action_eq(a: &Action, b: &Action) -> bool {
match (a, b) {
(Action::Shift(s1), Action::Shift(s2)) => s1 == s2,
(Action::Reduce(r1), Action::Reduce(r2)) => r1 == r2,
(Action::Accept, Action::Accept) => true,
(Action::Error, Action::Error) => true,
(Action::Fork(a1), Action::Fork(a2)) => {
a1.len() == a2.len() && a1.iter().zip(a2).all(|(x, y)| action_eq(x, y))
}
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lr_item_creation() {
let item = LRItem::new(RuleId(1), 2, SymbolId(3));
assert_eq!(item.rule_id, RuleId(1));
assert_eq!(item.position, 2);
assert_eq!(item.lookahead, SymbolId(3));
}
#[test]
fn test_lr_item_equality() {
let item1 = LRItem::new(RuleId(1), 2, SymbolId(3));
let item2 = LRItem::new(RuleId(1), 2, SymbolId(3));
let item3 = LRItem::new(RuleId(1), 3, SymbolId(3));
assert_eq!(item1, item2);
assert_ne!(item1, item3);
let mut set = std::collections::BTreeSet::new();
set.insert(item1.clone());
assert!(set.contains(&item1));
assert!(set.contains(&item2));
assert!(!set.contains(&item3));
}
#[test]
fn test_item_set_creation() {
let mut item_set = ItemSet::new(StateId(0));
let item = LRItem::new(RuleId(1), 0, SymbolId(0));
item_set.add_item(item.clone());
assert_eq!(item_set.id, StateId(0));
assert!(item_set.items.contains(&item));
assert_eq!(item_set.items.len(), 1);
}
#[test]
fn test_item_set_duplicate_items() {
let mut item_set = ItemSet::new(StateId(0));
let item = LRItem::new(RuleId(1), 0, SymbolId(0));
item_set.add_item(item.clone());
item_set.add_item(item.clone());
assert_eq!(item_set.items.len(), 1);
}
#[test]
fn test_first_follow_empty_grammar() {
let grammar = Grammar::new("test".to_string());
let first_follow = FirstFollowSets::compute(&grammar).unwrap();
assert!(first_follow.first.is_empty());
assert!(first_follow.follow.is_empty());
}
#[test]
fn test_first_follow_simple_grammar() {
let mut grammar = Grammar::new("test".to_string());
let rule = Rule {
lhs: SymbolId(0), rhs: vec![Symbol::Terminal(SymbolId(1))], precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(0),
};
grammar.rules.entry(SymbolId(0)).or_default().push(rule);
let token = Token {
name: "a".to_string(),
pattern: TokenPattern::String("a".to_string()),
fragile: false,
};
grammar.tokens.insert(SymbolId(1), token);
let first_follow = FirstFollowSets::compute(&grammar).unwrap();
assert!(first_follow.first.contains_key(&SymbolId(0)));
if let Some(first_s) = first_follow.first(SymbolId(0)) {
assert!(first_s.contains(1)); }
assert!(!first_follow.is_nullable(SymbolId(0)));
}
#[test]
fn test_first_follow_nullable_rule() {
let mut grammar = Grammar::new("test".to_string());
let rule = Rule {
lhs: SymbolId(0), rhs: vec![], precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(0),
};
grammar.rules.entry(SymbolId(0)).or_default().push(rule);
let first_follow = FirstFollowSets::compute(&grammar).unwrap();
assert!(first_follow.is_nullable(SymbolId(0)));
}
#[test]
fn test_first_of_sequence() {
let mut grammar = Grammar::new("test".to_string());
let token_a = Token {
name: "a".to_string(),
pattern: TokenPattern::String("a".to_string()),
fragile: false,
};
grammar.tokens.insert(SymbolId(1), token_a);
let token_b = Token {
name: "b".to_string(),
pattern: TokenPattern::String("b".to_string()),
fragile: false,
};
grammar.tokens.insert(SymbolId(2), token_b);
let first_follow = FirstFollowSets::compute(&grammar).unwrap();
let sequence = vec![Symbol::Terminal(SymbolId(1)), Symbol::Terminal(SymbolId(2))];
let first_seq = first_follow.first_of_sequence(&sequence).unwrap();
assert!(first_seq.contains(1));
assert!(!first_seq.contains(2));
}
#[test]
fn test_action_types() {
let shift = Action::Shift(StateId(1));
let reduce = Action::Reduce(RuleId(2));
let accept = Action::Accept;
let error = Action::Error;
let fork = Action::Fork(vec![shift.clone(), reduce.clone()]);
match shift {
Action::Shift(StateId(1)) => {}
_ => panic!("Expected shift action"),
}
match reduce {
Action::Reduce(RuleId(2)) => {}
_ => panic!("Expected reduce action"),
}
match accept {
Action::Accept => {}
_ => panic!("Expected accept action"),
}
match error {
Action::Error => {}
_ => panic!("Expected error action"),
}
match fork {
Action::Fork(actions) => {
assert_eq!(actions.len(), 2);
assert_eq!(actions[0], shift);
assert_eq!(actions[1], reduce);
}
_ => panic!("Expected fork action"),
}
}
#[test]
fn test_action_equality() {
let shift1 = Action::Shift(StateId(1));
let shift2 = Action::Shift(StateId(1));
let shift3 = Action::Shift(StateId(2));
assert_eq!(shift1, shift2);
assert_ne!(shift1, shift3);
let reduce1 = Action::Reduce(RuleId(1));
let reduce2 = Action::Reduce(RuleId(1));
assert_eq!(reduce1, reduce2);
assert_ne!(shift1, reduce1);
}
#[test]
fn test_symbol_metadata() {
let metadata = SymbolMetadata {
name: "expression".to_string(),
is_visible: true,
is_named: true,
is_supertype: false,
is_terminal: false,
is_extra: false,
is_fragile: false,
symbol_id: SymbolId(1),
};
assert_eq!(metadata.name, "expression");
assert!(metadata.is_visible);
assert!(metadata.is_named);
assert!(!metadata.is_supertype);
assert!(!metadata.is_terminal);
assert!(!metadata.is_extra);
assert!(!metadata.is_fragile);
assert_eq!(metadata.symbol_id, SymbolId(1));
}
#[test]
fn test_conflict_types() {
let shift_reduce = ConflictType::ShiftReduce;
let reduce_reduce = ConflictType::ReduceReduce;
assert_eq!(shift_reduce, ConflictType::ShiftReduce);
assert_eq!(reduce_reduce, ConflictType::ReduceReduce);
assert_ne!(shift_reduce, reduce_reduce);
}
#[test]
fn test_conflict_creation() {
let conflict = Conflict {
state: StateId(5),
symbol: SymbolId(10),
actions: vec![Action::Shift(StateId(1)), Action::Reduce(RuleId(2))],
conflict_type: ConflictType::ShiftReduce,
};
assert_eq!(conflict.state, StateId(5));
assert_eq!(conflict.symbol, SymbolId(10));
assert_eq!(conflict.actions.len(), 2);
assert_eq!(conflict.conflict_type, ConflictType::ShiftReduce);
}
#[test]
fn test_conflict_resolver_creation() {
let resolver = ConflictResolver { conflicts: vec![] };
assert!(resolver.conflicts.is_empty());
}
#[test]
fn test_parse_table_creation() {
let parse_table = ParseTable {
action_table: vec![vec![vec![Action::Error]; 5]; 3], goto_table: vec![vec![StateId(0); 5]; 3],
symbol_metadata: vec![],
state_count: 3,
symbol_count: 5,
symbol_to_index: BTreeMap::new(),
index_to_symbol: vec![],
external_scanner_states: vec![],
rules: vec![],
nonterminal_to_index: BTreeMap::new(),
goto_indexing: GotoIndexing::NonterminalMap,
eof_symbol: SymbolId(0),
start_symbol: SymbolId(1),
grammar: Grammar::new("test".to_string()),
initial_state: StateId(0),
token_count: 3,
external_token_count: 0,
lex_modes: vec![
LexMode {
lex_state: 0,
external_lex_state: 0
};
3
],
extras: vec![],
dynamic_prec_by_rule: vec![],
rule_assoc_by_rule: vec![],
alias_sequences: vec![],
field_names: vec![],
field_map: BTreeMap::new(),
};
assert_eq!(parse_table.state_count, 3);
assert_eq!(parse_table.symbol_count, 5);
assert_eq!(parse_table.action_table.len(), 3);
assert_eq!(parse_table.goto_table.len(), 3);
assert_eq!(parse_table.action_table[0].len(), 5);
assert_eq!(parse_table.goto_table[0].len(), 5);
}
#[test]
fn test_lr_item_reduce_check() {
let mut grammar = Grammar::new("test".to_string());
let rule = Rule {
lhs: SymbolId(0),
rhs: vec![Symbol::Terminal(SymbolId(1)), Symbol::Terminal(SymbolId(2))],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(0),
};
grammar.rules.entry(SymbolId(0)).or_default().push(rule);
let item1 = LRItem::new(RuleId(0), 0, SymbolId(0));
assert!(!item1.is_reduce_item(&grammar));
let item2 = LRItem::new(RuleId(0), 1, SymbolId(0));
assert!(!item2.is_reduce_item(&grammar));
let item3 = LRItem::new(RuleId(0), 2, SymbolId(0));
assert!(item3.is_reduce_item(&grammar));
}
#[test]
fn test_lr_item_next_symbol() {
let mut grammar = Grammar::new("test".to_string());
let rule = Rule {
lhs: SymbolId(0),
rhs: vec![Symbol::Terminal(SymbolId(1)), Symbol::Terminal(SymbolId(2))],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(0),
};
grammar.rules.entry(SymbolId(0)).or_default().push(rule);
let item1 = LRItem::new(RuleId(0), 0, SymbolId(0));
if let Some(symbol) = item1.next_symbol(&grammar) {
match symbol {
Symbol::Terminal(SymbolId(1)) => {}
_ => panic!("Expected terminal symbol with id 1"),
}
} else {
panic!("Expected next symbol");
}
let item2 = LRItem::new(RuleId(0), 1, SymbolId(0));
if let Some(symbol) = item2.next_symbol(&grammar) {
match symbol {
Symbol::Terminal(SymbolId(2)) => {}
_ => panic!("Expected terminal symbol with id 2"),
}
} else {
panic!("Expected next symbol");
}
let item3 = LRItem::new(RuleId(0), 2, SymbolId(0));
assert!(item3.next_symbol(&grammar).is_none());
}
#[test]
fn test_item_set_collection_creation() {
let collection = ItemSetCollection {
sets: vec![],
goto_table: IndexMap::new(),
symbol_is_terminal: IndexMap::new(),
};
assert!(collection.sets.is_empty());
assert!(collection.goto_table.is_empty());
}
#[test]
fn test_glr_error_types() {
let grammar_error = GLRError::GrammarError(GrammarError::InvalidFieldOrdering);
let conflict_error = GLRError::ConflictResolution("Test conflict".to_string());
let state_error = GLRError::StateMachine("Test state machine error".to_string());
match grammar_error {
GLRError::GrammarError(_) => {}
_ => panic!("Expected grammar error"),
}
match conflict_error {
GLRError::ConflictResolution(msg) => assert_eq!(msg, "Test conflict"),
_ => panic!("Expected conflict resolution error"),
}
match state_error {
GLRError::StateMachine(msg) => assert_eq!(msg, "Test state machine error"),
_ => panic!("Expected state machine error"),
}
}
#[test]
fn test_item_set_equality() {
let mut set1 = ItemSet::new(StateId(0));
let mut set2 = ItemSet::new(StateId(1));
let item1 = LRItem::new(RuleId(1), 0, SymbolId(0));
let item2 = LRItem::new(RuleId(2), 1, SymbolId(1));
set1.add_item(item1.clone());
set1.add_item(item2.clone());
set2.add_item(item1);
set2.add_item(item2);
assert_eq!(set1.items, set2.items);
assert_ne!(set1.id, set2.id);
}
#[test]
fn test_recursive_fork_normalization() {
let mut action = Action::Fork(vec![
Action::Fork(vec![
Action::Reduce(RuleId(3)),
Action::Shift(StateId(2)),
Action::Reduce(RuleId(1)),
]),
Action::Shift(StateId(1)),
Action::Fork(vec![
Action::Accept,
Action::Shift(StateId(4)),
Action::Error,
]),
]);
normalize_action(&mut action);
if let Action::Fork(ref actions) = action {
if let Action::Fork(ref inner) = actions[0] {
assert_eq!(inner.len(), 3);
assert!(matches!(inner[0], Action::Shift(StateId(2))));
assert!(matches!(inner[1], Action::Reduce(RuleId(1))));
assert!(matches!(inner[2], Action::Reduce(RuleId(3))));
}
if let Action::Fork(ref inner) = actions[2] {
assert_eq!(inner.len(), 3);
assert!(matches!(inner[0], Action::Shift(StateId(4))));
assert!(matches!(inner[1], Action::Accept));
assert!(matches!(inner[2], Action::Error));
}
} else {
panic!("Expected Fork action");
}
}
}