Skip to main content

adze_glr_core/
lib.rs

1// GLR core may need unsafe for performance-critical parser algorithms
2#![forbid(unsafe_op_in_unsafe_fn)]
3#![deny(private_interfaces)]
4#![cfg_attr(feature = "strict_api", deny(unreachable_pub))]
5#![cfg_attr(not(feature = "strict_api"), warn(unreachable_pub))]
6#![cfg_attr(feature = "strict_docs", warn(missing_docs))]
7#![cfg_attr(not(feature = "strict_docs"), allow(missing_docs))]
8// Keep surface stable without big refactors:
9#![allow(
10    clippy::ptr_arg,
11    clippy::explicit_counter_loop,
12    clippy::needless_range_loop,
13    clippy::unused_enumerate_index
14)]
15
16//! GLR parser generation algorithms for Adze
17//! This module implements the core GLR state machine generation and conflict resolution
18//!
19//! ## Contracts & Invariants
20//!
21//! This crate maintains several critical invariants for correct parsing:
22//!
23//! ### EOF Symbol Invariants
24//! - EOF symbol must be a terminal sentinel id at or beyond the terminal boundary
25//!   (`token_count + external_token_count`).
26//! - EOF symbol must not be the internal ERROR sentinel
27//!   (`parse_forest::ERROR_SYMBOL`, currently 0xFFFF).
28//! - EOF symbol is always present in the symbol_to_index mapping
29//! - EOF column actions are byte-for-byte copies of the TS "end" column,
30//!   guaranteeing per-state equality.
31//!
32//! ### Error Recovery Invariants
33//! - `has_error`: true if any error chunks exist in the parse forest
34//! - `missing`: count of unique missing terminal symbols inserted
35//! - `cost`: total error recovery cost (insertions + deletions)
36//! - No double counting: each missing symbol counted exactly once
37//! - Extras (whitespace/comments) are never inserted during recovery
38//!
39//! ### Table Normalization
40//! - Action cells are sorted deterministically by action type and value
41//! - Duplicate actions are removed from cells
42//! - Action ordering: Shift < Reduce < Accept < Error < Recover < Fork
43//!
44//! ### API Stability
45//! - `ForestView` trait is sealed and cannot be implemented outside this crate
46//! - `Action` enum is marked `#[non_exhaustive]` for future extensibility
47//! - Test-only APIs are gated behind `test-helpers` feature
48//!
49//! ### Validation
50//! Enable the `strict-invariants` feature to validate parse tables at runtime.
51//! This adds overhead but catches invariant violations early in development.
52
53use adze_ir::*;
54use fixedbitset::FixedBitSet;
55use indexmap::IndexMap;
56use serde::{Deserialize, Serialize};
57use std::collections::{BTreeMap, BTreeSet};
58
59/// Error types and Result alias for GLR operations.
60pub mod error;
61/// Convenience result alias for GLR operations.
62pub use error::Result as GlrResult;
63
64/// Back-compat alias: prefer `GlrError`; `GLRError` remains for now.
65pub use GLRError as GlrError;
66
67/// Conflict inspection API for analyzing GLR parse table conflicts
68pub mod conflict_inspection;
69
70// Re-export key types from adze-ir for API consumers
71/// Re-exported IR types used throughout GLR construction.
72pub use adze_ir::{Grammar, RuleId, StateId, SymbolId};
73
74/// Stable imports for downstream users during 0.8.0-dev.
75pub mod prelude {
76    pub use crate::{FirstFollowSets, ParseTable, build_lr1_automaton};
77}
78
79// Keep available, but don't promise public docs yet:
80#[doc(hidden)]
81pub mod advanced_conflict;
82#[doc(hidden)]
83pub mod conflict_resolution;
84#[doc(hidden)]
85pub mod conflict_visualizer;
86#[doc(hidden)]
87pub mod disambiguation;
88#[doc(hidden)]
89pub mod gss;
90#[doc(hidden)]
91pub mod gss_arena;
92#[doc(hidden)]
93pub mod parse_forest;
94
95pub mod driver;
96pub mod forest_view;
97pub mod stack;
98/// Telemetry counters for tracking GLR parser operations.
99pub mod telemetry;
100/// Tree-sitter compatible lexer interface for GLR parsing.
101pub mod ts_lexer;
102
103/// ParseTable serialization for GLR mode
104#[cfg(feature = "serialization")]
105pub mod serialization;
106
107// Trace macro for debugging GLR conflicts and decisions
108/// Internal tracing macro used by the GLR runtime in debug/test builds.
109#[cfg(any(feature = "glr_trace", feature = "debug_glr"))]
110#[macro_export]
111macro_rules! debug_trace {
112    ($($t:tt)*) => { eprintln!("[GLR] {}", format!($($t)*)); }
113}
114#[cfg(not(any(feature = "glr_trace", feature = "debug_glr")))]
115#[macro_export]
116macro_rules! debug_trace {
117    ($($t:tt)*) => {};
118}
119
120/// Backward-compatible trace macro.
121#[cfg(any(feature = "glr_trace", feature = "debug_glr"))]
122#[macro_export]
123macro_rules! glr_trace {
124    ($($t:tt)*) => { debug_trace!($($t)*); }
125}
126#[cfg(not(any(feature = "glr_trace", feature = "debug_glr")))]
127#[macro_export]
128macro_rules! glr_trace {
129    ($($t:tt)*) => { debug_trace!($($t)*); }
130}
131
132#[doc(hidden)]
133pub mod perf_optimizations;
134#[doc(hidden)]
135pub mod precedence_compare;
136#[doc(hidden)]
137pub mod symbol_comparison;
138#[doc(hidden)]
139pub mod version_info;
140
141pub mod lib_v2;
142
143#[cfg(any(test, feature = "test-api"))]
144/// Utilities for constructing test parse tables and grammars.
145pub mod test_helpers;
146
147#[cfg(test)]
148/// Simple symbol allocator used in tests.
149pub mod test_symbol_alloc;
150
151#[doc(hidden)]
152pub use advanced_conflict::{
153    ConflictAnalyzer, ConflictStats, PrecedenceDecision, PrecedenceResolver,
154};
155#[doc(hidden)]
156pub use conflict_resolution::{RuntimeConflictResolver, VecWrapperResolver};
157#[doc(hidden)]
158pub use conflict_visualizer::{ConflictVisualizer, generate_dot_graph};
159#[doc(hidden)]
160pub use gss::{GSSStats, GraphStructuredStack, StackNode};
161#[doc(hidden)]
162pub use parse_forest::{ForestNode, ParseError, ParseForest, ParseNode, ParseTree};
163#[doc(hidden)]
164pub use perf_optimizations::{ParseTableCache, PerfStats, StackDeduplicator, StackPool};
165#[doc(hidden)]
166pub use precedence_compare::{
167    PrecedenceComparison, PrecedenceInfo, StaticPrecedenceResolver, compare_precedences,
168};
169#[doc(hidden)]
170pub use symbol_comparison::{compare_symbols, compare_versions_with_symbols};
171#[doc(hidden)]
172pub use version_info::{CompareResult, VersionInfo, compare_versions};
173
174// ============================================================================
175// EOF Symbol Sentinel Handling
176// ============================================================================
177//
178// FirstFollowSets uses SymbolId(0) as an internal sentinel to represent EOF
179// in FIRST/FOLLOW set computations. However, the actual EOF symbol in the
180// parse table is dynamically assigned to avoid conflicts with grammar symbols.
181//
182// Use these helpers at the boundary where FIRST/FOLLOW results become real
183// lookahead symbols in the parse table.
184
185/// Internal EOF sentinel used by FirstFollowSets.
186/// This is NOT the actual EOF symbol - use `parse_table.eof_symbol` for that.
187const EOF_SENTINEL: SymbolId = SymbolId(0);
188
189/// Map a symbol from FOLLOW set output to actual parse table symbol.
190/// Replaces the EOF sentinel (SymbolId(0)) with the actual EOF symbol.
191#[inline]
192fn map_follow_symbol(sym: SymbolId, eof_symbol: SymbolId) -> SymbolId {
193    if sym == EOF_SENTINEL { eof_symbol } else { sym }
194}
195
196// Precedence resolution structures
197#[derive(Copy, Clone, Debug, Eq, PartialEq)]
198enum Assoc {
199    Left,
200    Right,
201    None,
202}
203
204#[derive(Copy, Clone, Debug)]
205struct TokPrec {
206    prec: u8,
207    assoc: Assoc,
208}
209
210#[derive(Copy, Clone, Debug)]
211struct RulePrec {
212    prec: u8,
213    assoc: Assoc,
214}
215
216struct PrecTables {
217    // table-indexed; entries 0..token_count-1 may be Some(..); others None
218    tok_prec_by_index: Vec<Option<TokPrec>>,
219    // production_id -> precedence and associativity
220    rule_prec: Vec<RulePrec>,
221}
222
223fn build_prec_tables(
224    grammar: &Grammar,
225    symbol_to_index: &BTreeMap<SymbolId, usize>,
226    token_count: u32,
227    production_count: u32,
228) -> PrecTables {
229    use adze_ir::{Associativity, PrecedenceKind};
230
231    // Guard rail: ensure we have the right table structure
232    // Note: token_count can be 0 for empty grammars (only EOF token)
233    debug_assert!(production_count > 0, "production_count must be positive");
234
235    // token precedence by table index
236    let mut tok_prec_by_index = vec![None; symbol_to_index.len()];
237    let tok_prec_len = tok_prec_by_index.len(); // Capture length before closure
238
239    // Helper: set token precedence preferring higher numeric level
240    let mut set_tok_prec = |tok_idx: usize, new: TokPrec| {
241        if tok_idx >= tok_prec_by_index.len() {
242            return; // Guard against out-of-bounds
243        }
244        tok_prec_by_index[tok_idx] = match tok_prec_by_index[tok_idx] {
245            None => Some(new),
246            Some(old) => Some(if new.prec > old.prec { new } else { old }),
247        };
248    };
249
250    // production precedence: explicit if present, else rightmost-terminal precedence
251    let mut rule_prec = vec![
252        RulePrec {
253            prec: 0,
254            assoc: Assoc::None
255        };
256        production_count as usize
257    ];
258
259    // Two-pass approach: first collect rule precedence, then derive token precedence
260    for rules in grammar.rules.values() {
261        for rule in rules {
262            let pid = rule.production_id.0 as usize;
263            // Skip invalid production IDs (e.g., u16::MAX used as sentinel)
264            if pid >= production_count as usize {
265                continue;
266            }
267
268            // 1) Rule precedence (explicit)
269            let explicit = rule.precedence.and_then(|p| {
270                if let PrecedenceKind::Static(level) = p {
271                    Some(level as u8)
272                } else {
273                    None
274                }
275            });
276
277            // Get rule associativity (defaults to None if not specified)
278            let rule_assoc = rule
279                .associativity
280                .map(|assoc| match assoc {
281                    Associativity::Left => Assoc::Left,
282                    Associativity::Right => Assoc::Right,
283                    Associativity::None => Assoc::None,
284                })
285                .unwrap_or(Assoc::None);
286
287            // 2) Derive token precedence from the rightmost terminal if this rule carries
288            //    an explicit precedence (+ associativity) attribute
289            if let Some(level) = explicit {
290                // Find rightmost terminal in RHS
291                let tok_idx_opt = rule.rhs.iter().rev().find_map(|sym| {
292                    if let Symbol::Terminal(id) = sym {
293                        symbol_to_index.get(id).copied()
294                    } else {
295                        None
296                    }
297                });
298
299                if let Some(tok_idx) = tok_idx_opt
300                    && tok_idx < tok_prec_len
301                {
302                    set_tok_prec(
303                        tok_idx,
304                        TokPrec {
305                            prec: level,
306                            assoc: rule_assoc,
307                        },
308                    );
309                }
310            }
311
312            // 3) Store the rule precedence AND associativity
313            rule_prec[pid] = RulePrec {
314                prec: explicit.unwrap_or(0),
315                assoc: rule_assoc,
316            };
317        }
318    }
319
320    // Second pass: for rules without explicit precedence, inherit from rightmost terminal
321    for rules in grammar.rules.values() {
322        for rule in rules {
323            let pid = rule.production_id.0 as usize;
324            if pid >= production_count as usize {
325                continue;
326            }
327
328            // Skip if already has precedence
329            if rule_prec[pid].prec > 0 {
330                continue;
331            }
332
333            // Inherit from rightmost terminal token precedence AND associativity
334            let derived = rule
335                .rhs
336                .iter()
337                .rev()
338                .find_map(|sym| {
339                    if let Symbol::Terminal(id) = sym {
340                        symbol_to_index.get(id).and_then(|&idx| {
341                            if (idx as u32) < token_count {
342                                tok_prec_by_index[idx]
343                            } else {
344                                None
345                            }
346                        })
347                    } else {
348                        None
349                    }
350                })
351                .unwrap_or(TokPrec {
352                    prec: 0,
353                    assoc: Assoc::None,
354                });
355
356            rule_prec[pid] = RulePrec {
357                prec: derived.prec,
358                assoc: derived.assoc,
359            };
360        }
361    }
362
363    PrecTables {
364        tok_prec_by_index,
365        rule_prec,
366    }
367}
368
369#[derive(Copy, Clone, Debug, Eq, PartialEq)]
370enum PrecDecision {
371    PreferShift,
372    PreferReduce,
373    Error,
374    NoInfo,
375}
376
377#[inline]
378fn decide_with_precedence(
379    lookahead_tok_idx: usize, // table-index of token
380    reduce_prod_id: u16,      // production id from Action::Reduce
381    prec: &PrecTables,
382) -> PrecDecision {
383    // Guard rail: production ID must be valid
384    if reduce_prod_id as usize >= prec.rule_prec.len() {
385        return PrecDecision::NoInfo;
386    }
387
388    let tokp = match prec
389        .tok_prec_by_index
390        .get(lookahead_tok_idx)
391        .and_then(|o| *o)
392    {
393        Some(p) => p,
394        None => return PrecDecision::NoInfo,
395    };
396    let rulep = prec.rule_prec[reduce_prod_id as usize];
397
398    // If either has no precedence info (0), can't decide
399    if tokp.prec == 0 || rulep.prec == 0 {
400        return PrecDecision::NoInfo;
401    }
402
403    use core::cmp::Ordering::*;
404    // FIX: Use RULE's associativity, not token's!
405    // When precedences are equal, the rule's associativity determines shift vs reduce
406    match (tokp.prec.cmp(&rulep.prec), rulep.assoc) {
407        (Greater, _) => PrecDecision::PreferShift,
408        (Less, _) => PrecDecision::PreferReduce,
409        (Equal, Assoc::Left) => PrecDecision::PreferReduce, // left-assoc: reduce first
410        (Equal, Assoc::Right) => PrecDecision::PreferShift, // right-assoc: shift first
411        (Equal, Assoc::None) => PrecDecision::Error,
412    }
413}
414
415// Handle reduce/reduce conflicts (prefer higher rule precedence, tie -> lowest pid)
416#[inline]
417fn decide_reduce_reduce(a: u16, b: u16, prec: &PrecTables) -> u16 {
418    let pa = prec.rule_prec.get(a as usize).map(|r| r.prec).unwrap_or(0);
419    let pb = prec.rule_prec.get(b as usize).map(|r| r.prec).unwrap_or(0);
420    if pa > pb {
421        a
422    } else if pb > pa {
423        b
424    } else {
425        a.min(b)
426    }
427}
428
429// Public API exports
430/// The main GLR parser driver.
431pub use driver::Driver;
432/// Core parse forest types and views.
433pub use forest_view::{Forest, ForestView, Span};
434
435/// Internal performance counters (diagnostics only).
436#[cfg(feature = "perf_counters")]
437#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
438pub mod perf {
439    use std::sync::atomic::{AtomicU64, Ordering};
440
441    /// Snapshot of performance counter values.
442    #[derive(Clone, Debug, Default)]
443    pub struct Counters {
444        /// Number of shift operations.
445        pub shifts: u64,
446        /// Number of reduce operations.
447        pub reductions: u64,
448        /// Number of parser forks.
449        pub forks: u64,
450        /// Number of stack merges.
451        pub merges: u64,
452    }
453
454    static SHIFTS: AtomicU64 = AtomicU64::new(0);
455    static REDUCTIONS: AtomicU64 = AtomicU64::new(0);
456    static FORKS: AtomicU64 = AtomicU64::new(0);
457    static MERGES: AtomicU64 = AtomicU64::new(0);
458
459    /// Increment the shift counter by `n`.
460    #[inline]
461    pub fn inc_shifts(n: u64) {
462        SHIFTS.fetch_add(n, Ordering::Relaxed);
463    }
464
465    /// Increment the reduction counter by `n`.
466    #[inline]
467    pub fn inc_reductions(n: u64) {
468        REDUCTIONS.fetch_add(n, Ordering::Relaxed);
469    }
470
471    /// Increment the fork counter by `n`.
472    #[inline]
473    pub fn inc_forks(n: u64) {
474        FORKS.fetch_add(n, Ordering::Relaxed);
475    }
476
477    /// Increment the merge counter by `n`.
478    #[inline]
479    pub fn inc_merges(n: u64) {
480        MERGES.fetch_add(n, Ordering::Relaxed);
481    }
482
483    /// Take a snapshot of the current counter values.
484    pub fn snapshot() -> Counters {
485        Counters {
486            shifts: SHIFTS.load(Ordering::Relaxed),
487            reductions: REDUCTIONS.load(Ordering::Relaxed),
488            forks: FORKS.load(Ordering::Relaxed),
489            merges: MERGES.load(Ordering::Relaxed),
490        }
491    }
492
493    /// Atomic read-and-clear (consistent snapshot)
494    pub fn take() -> Counters {
495        Counters {
496            shifts: SHIFTS.swap(0, Ordering::Relaxed),
497            reductions: REDUCTIONS.swap(0, Ordering::Relaxed),
498            forks: FORKS.swap(0, Ordering::Relaxed),
499            merges: MERGES.swap(0, Ordering::Relaxed),
500        }
501    }
502
503    /// Reset all counters to zero.
504    pub fn reset() {
505        SHIFTS.store(0, Ordering::Relaxed);
506        REDUCTIONS.store(0, Ordering::Relaxed);
507        FORKS.store(0, Ordering::Relaxed);
508        MERGES.store(0, Ordering::Relaxed);
509    }
510}
511
512/// Internal performance counters (diagnostics only).
513#[cfg(not(feature = "perf_counters"))]
514#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
515pub mod perf {
516    /// Snapshot of performance counter values (no-op when disabled).
517    #[derive(Clone, Debug, Default)]
518    pub struct Counters {
519        /// Number of shift operations.
520        pub shifts: u64,
521        /// Number of reduce operations.
522        pub reductions: u64,
523        /// Number of parser forks.
524        pub forks: u64,
525        /// Number of stack merges.
526        pub merges: u64,
527    }
528
529    /// No-op: increment shift counter.
530    #[inline(always)]
531    pub fn inc_shifts(_: u64) {}
532
533    /// No-op: increment reduction counter.
534    #[inline(always)]
535    pub fn inc_reductions(_: u64) {}
536
537    /// No-op: increment fork counter.
538    #[inline(always)]
539    pub fn inc_forks(_: u64) {}
540
541    /// No-op: increment merge counter.
542    #[inline(always)]
543    pub fn inc_merges(_: u64) {}
544
545    /// Returns default (zeroed) counters.
546    #[inline(always)]
547    pub fn snapshot() -> Counters {
548        Counters::default()
549    }
550
551    /// Present even when disabled so benches/tests compile unchanged.
552    #[inline(always)]
553    pub fn take() -> Counters {
554        Counters::default()
555    }
556
557    /// No-op: reset counters.
558    #[inline(always)]
559    pub fn reset() {}
560}
561
562/// FIRST/FOLLOW sets computation for GLR parsing
563#[derive(Debug, Clone)]
564pub struct FirstFollowSets {
565    first: IndexMap<SymbolId, FixedBitSet>,
566    follow: IndexMap<SymbolId, FixedBitSet>,
567    nullable: FixedBitSet,
568    #[allow(dead_code)]
569    symbol_count: usize,
570}
571
572impl FirstFollowSets {
573    fn get_max_symbol_id(symbol: &Symbol) -> u16 {
574        match symbol {
575            Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id.0,
576            Symbol::Optional(inner) | Symbol::Repeat(inner) | Symbol::RepeatOne(inner) => {
577                Self::get_max_symbol_id(inner)
578            }
579            Symbol::Choice(choices) => choices
580                .iter()
581                .map(Self::get_max_symbol_id)
582                .max()
583                .unwrap_or(0),
584            Symbol::Sequence(seq) => seq.iter().map(Self::get_max_symbol_id).max().unwrap_or(0),
585            Symbol::Epsilon => 0,
586        }
587    }
588
589    /// Compute FIRST/FOLLOW sets for the given grammar with automatic normalization.
590    ///
591    /// This method automatically normalizes complex symbols (Repeat, Choice, etc.) before computation.
592    ///
593    /// # Examples
594    ///
595    /// ```
596    /// use adze_glr_core::FirstFollowSets;
597    /// use adze_ir::*;
598    ///
599    /// // Build a tiny grammar: E → a | E '+' E
600    /// let mut grammar = Grammar::new("expr".into());
601    /// let a = SymbolId(1);
602    /// let plus = SymbolId(2);
603    /// let e = SymbolId(10);
604    ///
605    /// grammar.tokens.insert(a, Token { name: "a".into(), pattern: TokenPattern::String("a".into()), fragile: false });
606    /// grammar.tokens.insert(plus, Token { name: "+".into(), pattern: TokenPattern::String("+".into()), fragile: false });
607    /// grammar.rule_names.insert(e, "E".into());
608    /// grammar.rules.insert(e, vec![
609    ///     Rule { lhs: e, rhs: vec![Symbol::Terminal(a)], precedence: None, associativity: None, fields: vec![], production_id: ProductionId(0) },
610    ///     Rule { lhs: e, rhs: vec![Symbol::NonTerminal(e), Symbol::Terminal(plus), Symbol::NonTerminal(e)], precedence: None, associativity: None, fields: vec![], production_id: ProductionId(1) },
611    /// ]);
612    ///
613    /// let ff = FirstFollowSets::compute_normalized(&mut grammar).unwrap();
614    /// // 'a' (SymbolId 1) should be in FIRST(E)
615    /// assert!(ff.first(e).unwrap().contains(a.0 as usize));
616    /// ```
617    #[must_use = "computation result must be checked"]
618    pub fn compute_normalized(grammar: &mut Grammar) -> Result<Self, GLRError> {
619        // Normalize the grammar to convert complex symbols to simple rules
620        grammar.normalize();
621
622        // Now compute FIRST/FOLLOW sets on the normalized grammar
623        Self::compute(grammar)
624    }
625
626    /// Compute FIRST/FOLLOW sets for the given grammar.
627    ///
628    /// # Examples
629    ///
630    /// ```
631    /// use adze_glr_core::FirstFollowSets;
632    /// use adze_ir::*;
633    ///
634    /// let mut grammar = Grammar::new("simple".into());
635    /// let a = SymbolId(1);
636    /// let s = SymbolId(10);
637    ///
638    /// grammar.tokens.insert(a, Token { name: "a".into(), pattern: TokenPattern::String("a".into()), fragile: false });
639    /// grammar.rule_names.insert(s, "S".into());
640    /// grammar.rules.insert(s, vec![
641    ///     Rule { lhs: s, rhs: vec![Symbol::Terminal(a)], precedence: None, associativity: None, fields: vec![], production_id: ProductionId(0) },
642    /// ]);
643    ///
644    /// let ff = FirstFollowSets::compute(&grammar).unwrap();
645    /// assert!(ff.first(s).unwrap().contains(a.0 as usize));
646    /// assert!(!ff.is_nullable(s));
647    /// ```
648    #[must_use = "computation result must be checked"]
649    pub fn compute(grammar: &Grammar) -> Result<Self, GLRError> {
650        // Clone and normalize the grammar if it contains complex symbols
651        let normalized_grammar = {
652            let mut cloned = grammar.clone();
653            let _ = cloned.normalize(); // normalize returns Vec<Rule>, ignore it
654            cloned
655        };
656
657        // Use the normalized grammar for computation
658        let grammar = &normalized_grammar;
659        // Find the maximum symbol ID to determine the size needed
660        let max_rule_id = grammar.rules.keys().map(|id| id.0).max().unwrap_or(0);
661        let max_token_id = grammar.tokens.keys().map(|id| id.0).max().unwrap_or(0);
662        let max_external_id = grammar
663            .externals
664            .iter()
665            .map(|e| e.symbol_id.0)
666            .max()
667            .unwrap_or(0);
668
669        // Also check max symbol ID in all rule RHS
670        let mut max_rhs_id = 0u16;
671        for rules in grammar.rules.values() {
672            for rule in rules {
673                for symbol in &rule.rhs {
674                    max_rhs_id = max_rhs_id.max(Self::get_max_symbol_id(symbol));
675                }
676            }
677        }
678
679        let symbol_count = (max_rule_id
680            .max(max_token_id)
681            .max(max_external_id)
682            .max(max_rhs_id)
683            + 2) as usize; // +2 to leave room for EOF and other potential symbols
684
685        let mut first = IndexMap::new();
686        let mut follow = IndexMap::new();
687        let mut nullable = FixedBitSet::with_capacity(symbol_count);
688
689        // Initialize sets
690        for &symbol_id in grammar.rules.keys().chain(grammar.tokens.keys()) {
691            first.insert(symbol_id, FixedBitSet::with_capacity(symbol_count));
692            follow.insert(symbol_id, FixedBitSet::with_capacity(symbol_count));
693        }
694
695        // Compute FIRST sets
696        let mut changed = true;
697        while changed {
698            changed = false;
699
700            for rule in grammar.all_rules() {
701                let lhs = rule.lhs;
702                let mut rule_nullable = true;
703
704                for symbol in &rule.rhs {
705                    match symbol {
706                        Symbol::Terminal(id) => {
707                            if let Some(first_set) = first.get_mut(&lhs)
708                                && !first_set.contains(id.0 as usize)
709                            {
710                                first_set.insert(id.0 as usize);
711                                changed = true;
712                            }
713                            rule_nullable = false;
714                            break;
715                        }
716                        Symbol::NonTerminal(id) | Symbol::External(id) => {
717                            if let Some(symbol_first) = first.get(id).cloned()
718                                && let Some(lhs_first) = first.get_mut(&lhs)
719                            {
720                                let old_len = lhs_first.count_ones(..);
721                                lhs_first.union_with(&symbol_first);
722                                if lhs_first.count_ones(..) > old_len {
723                                    changed = true;
724                                }
725                            }
726
727                            if !nullable.contains(id.0 as usize) {
728                                rule_nullable = false;
729                                break;
730                            }
731                        }
732                        Symbol::Epsilon => {
733                            // Epsilon doesn't contribute to FIRST set
734                            // but keeps rule nullable
735                        }
736                        Symbol::Optional(_)
737                        | Symbol::Repeat(_)
738                        | Symbol::RepeatOne(_)
739                        | Symbol::Choice(_)
740                        | Symbol::Sequence(_) => {
741                            // These should be normalized before FIRST/FOLLOW computation
742                            return Err(GLRError::ComplexSymbolsNotNormalized {
743                                operation: "FIRST/FOLLOW computation".to_string(),
744                            });
745                        }
746                    }
747                }
748
749                if rule_nullable && !nullable.contains(lhs.0 as usize) {
750                    nullable.insert(lhs.0 as usize);
751                    changed = true;
752                }
753            }
754        }
755
756        // Compute FOLLOW sets
757        // Initialize FOLLOW(start_symbol) with EOF
758        if let Some(start_symbol) = grammar.start_symbol()
759            && let Some(follow_set) = follow.get_mut(&start_symbol)
760        {
761            follow_set.insert(0); // EOF symbol
762        }
763
764        changed = true;
765        while changed {
766            changed = false;
767
768            for rule in grammar.all_rules() {
769                // Special handling for rules of the form A -> A B (left recursion)
770                if rule.rhs.len() >= 2
771                    && let (Symbol::NonTerminal(first_id), Symbol::NonTerminal(second_id)) =
772                        (&rule.rhs[0], &rule.rhs[1])
773                    && *first_id == rule.lhs
774                {
775                    // This is a left-recursive rule like Module_body_vec_contents -> Module_body_vec_contents Statement
776                    // FIRST(Statement) should be in FOLLOW(Module_body_vec_contents)
777                    if let Some(first_of_second) = first.get(second_id)
778                        && let Some(follow_set) = follow.get_mut(&rule.lhs)
779                    {
780                        let old_len = follow_set.count_ones(..);
781                        follow_set.union_with(first_of_second);
782                        if follow_set.count_ones(..) > old_len {
783                            changed = true;
784                        }
785                    }
786                }
787
788                for (i, symbol) in rule.rhs.iter().enumerate() {
789                    if let Symbol::NonTerminal(id) | Symbol::External(id) = symbol {
790                        // Add FIRST of remaining symbols to FOLLOW of current symbol
791                        let remaining = &rule.rhs[i + 1..];
792                        let first_of_remaining =
793                            Self::first_of_sequence_static(remaining, &first, &nullable)?;
794
795                        if let Some(follow_set) = follow.get_mut(id) {
796                            let old_len = follow_set.count_ones(..);
797                            follow_set.union_with(&first_of_remaining);
798                            if follow_set.count_ones(..) > old_len {
799                                changed = true;
800                            }
801                        }
802
803                        // If remaining symbols are nullable, add FOLLOW of LHS
804                        if Self::sequence_is_nullable(remaining, &nullable)
805                            && let Some(lhs_follow) = follow.get(&rule.lhs).cloned()
806                            && let Some(follow_set) = follow.get_mut(id)
807                        {
808                            let old_len = follow_set.count_ones(..);
809                            follow_set.union_with(&lhs_follow);
810                            if follow_set.count_ones(..) > old_len {
811                                changed = true;
812                            }
813                        }
814                    }
815                }
816            }
817        }
818
819        Ok(Self {
820            first,
821            follow,
822            nullable,
823            symbol_count,
824        })
825    }
826
827    /// Get FIRST set of a sequence of symbols
828    #[must_use = "computation result must be checked"]
829    pub fn first_of_sequence(&self, symbols: &[Symbol]) -> Result<FixedBitSet, GLRError> {
830        Self::first_of_sequence_static(symbols, &self.first, &self.nullable)
831    }
832
833    fn first_of_sequence_static(
834        symbols: &[Symbol],
835        first: &IndexMap<SymbolId, FixedBitSet>,
836        nullable: &FixedBitSet,
837    ) -> Result<FixedBitSet, GLRError> {
838        let mut result = FixedBitSet::with_capacity(nullable.len());
839
840        for symbol in symbols {
841            match symbol {
842                Symbol::Terminal(id) => {
843                    result.insert(id.0 as usize);
844                    break;
845                }
846                Symbol::Epsilon => {
847                    // Epsilon doesn't contribute to FIRST set, continue to next symbol
848                }
849                Symbol::NonTerminal(id) | Symbol::External(id) => {
850                    if let Some(symbol_first) = first.get(id) {
851                        result.union_with(symbol_first);
852                    }
853
854                    if !nullable.contains(id.0 as usize) {
855                        break;
856                    }
857                }
858                Symbol::Optional(_)
859                | Symbol::Repeat(_)
860                | Symbol::RepeatOne(_)
861                | Symbol::Choice(_)
862                | Symbol::Sequence(_) => {
863                    return Err(GLRError::ComplexSymbolsNotNormalized {
864                        operation: "FIRST/FOLLOW computation".to_string(),
865                    });
866                }
867            }
868        }
869
870        Ok(result)
871    }
872
873    fn sequence_is_nullable(symbols: &[Symbol], nullable: &FixedBitSet) -> bool {
874        symbols.iter().all(|symbol| match symbol {
875            Symbol::Terminal(_) => false,
876            Symbol::NonTerminal(id) | Symbol::External(id) => nullable.contains(id.0 as usize),
877            Symbol::Epsilon => true,
878            Symbol::Optional(_)
879            | Symbol::Repeat(_)
880            | Symbol::RepeatOne(_)
881            | Symbol::Choice(_)
882            | Symbol::Sequence(_) => {
883                panic!("Complex symbols should be normalized before FIRST/FOLLOW computation");
884            }
885        })
886    }
887
888    /// Get FIRST set for a symbol
889    pub fn first(&self, symbol: SymbolId) -> Option<&FixedBitSet> {
890        self.first.get(&symbol)
891    }
892
893    /// Get FOLLOW set for a symbol
894    pub fn follow(&self, symbol: SymbolId) -> Option<&FixedBitSet> {
895        self.follow.get(&symbol)
896    }
897
898    /// Check if a symbol is nullable
899    pub fn is_nullable(&self, symbol: SymbolId) -> bool {
900        self.nullable.contains(symbol.0 as usize)
901    }
902}
903
904/// LR(1) item for GLR parsing
905#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
906pub struct LRItem {
907    /// Owning rule for this item/state
908    pub rule_id: RuleId,
909    /// Position within the rule's RHS
910    pub position: usize,
911    /// Lookahead symbol for LR(1) parsing
912    pub lookahead: SymbolId,
913}
914
915impl LRItem {
916    /// Construct an `LRItem` from its owning rule, dot position, and lookahead symbol.
917    pub fn new(rule_id: RuleId, position: usize, lookahead: SymbolId) -> Self {
918        Self {
919            rule_id,
920            position,
921            lookahead,
922        }
923    }
924
925    /// Check if this item is at the end of the rule (reduce item)
926    pub fn is_reduce_item(&self, grammar: &Grammar) -> bool {
927        if let Some(rule) = grammar
928            .all_rules()
929            .find(|r| r.production_id.0 == self.rule_id.0)
930        {
931            // Special case: epsilon rules (A -> epsilon) are reduce items at position 0
932            // because epsilon doesn't need to be "consumed" - it represents empty string
933            if rule.rhs.len() == 1 && matches!(rule.rhs[0], Symbol::Epsilon) {
934                return true; // Always a reduce item for epsilon rules
935            }
936
937            self.position >= rule.rhs.len()
938        } else {
939            false
940        }
941    }
942
943    /// Get the symbol after the dot (next symbol to parse)
944    pub fn next_symbol<'a>(&self, grammar: &'a Grammar) -> Option<&'a Symbol> {
945        if let Some(rule) = grammar
946            .all_rules()
947            .find(|r| r.production_id.0 == self.rule_id.0)
948        {
949            rule.rhs.get(self.position)
950        } else {
951            None
952        }
953    }
954}
955
956/// Set of LR(1) items representing a parser state
957#[derive(Debug, Clone, PartialEq, Eq)]
958pub struct ItemSet {
959    /// The LR(1) item set that defines this state's closure
960    pub items: BTreeSet<LRItem>,
961    /// Unique identifier for this state in the canonical collection
962    pub id: StateId,
963}
964
965impl ItemSet {
966    /// Create a new empty item set with the given state ID
967    pub fn new(id: StateId) -> Self {
968        Self {
969            items: BTreeSet::new(),
970            id,
971        }
972    }
973
974    /// Add an LR(1) item to this item set
975    pub fn add_item(&mut self, item: LRItem) {
976        self.items.insert(item);
977    }
978
979    /// Compute closure of this item set
980    pub fn closure(
981        &mut self,
982        grammar: &Grammar,
983        first_follow: &FirstFollowSets,
984    ) -> Result<(), GLRError> {
985        let _initial_size = self.items.len();
986
987        let mut added = true;
988        let mut _iteration = 0;
989        while added {
990            added = false;
991            _iteration += 1;
992            let current_items: Vec<_> = self.items.iter().cloned().collect();
993
994            for item in current_items {
995                if let Some(Symbol::NonTerminal(symbol_id)) = item.next_symbol(grammar) {
996                    // Find all rules with this symbol as LHS
997                    if let Some(rules) = grammar.get_rules_for_symbol(*symbol_id) {
998                        for rule in rules {
999                            // Compute FIRST of β α where β is the rest of the current rule
1000                            // and α is the lookahead
1001                            let mut beta = Vec::new();
1002                            if let Some(current_rule) = grammar
1003                                .all_rules()
1004                                .find(|r| r.production_id.0 == item.rule_id.0)
1005                            {
1006                                beta.extend_from_slice(&current_rule.rhs[item.position + 1..]);
1007                            }
1008                            beta.push(Symbol::Terminal(item.lookahead));
1009
1010                            let first_beta_alpha = first_follow.first_of_sequence(&beta)?;
1011
1012                            // Add new items for each symbol in FIRST(β α)
1013                            for lookahead_idx in first_beta_alpha.ones() {
1014                                let new_item = LRItem::new(
1015                                    RuleId(rule.production_id.0),
1016                                    0,
1017                                    SymbolId(lookahead_idx as u16),
1018                                );
1019
1020                                if !self.items.contains(&new_item) {
1021                                    self.items.insert(new_item);
1022                                    added = true;
1023                                    if rule.rhs.is_empty() {
1024                                        // Empty production
1025                                    }
1026                                }
1027                            }
1028                        }
1029                    }
1030                }
1031            }
1032        }
1033
1034        // Closure complete
1035        Ok(())
1036    }
1037
1038    /// Compute GOTO for a given symbol
1039    pub fn goto(
1040        &self,
1041        symbol: &Symbol,
1042        grammar: &Grammar,
1043        _first_follow: &FirstFollowSets,
1044    ) -> ItemSet {
1045        let mut new_set = ItemSet::new(StateId(0)); // ID will be assigned later
1046
1047        // Add all items where the dot can advance over the given symbol
1048        for item in &self.items {
1049            if let Some(next_sym) = item.next_symbol(grammar)
1050                && std::mem::discriminant(next_sym) == std::mem::discriminant(symbol)
1051            {
1052                match (next_sym, symbol) {
1053                    (Symbol::Terminal(a), Symbol::Terminal(b))
1054                    | (Symbol::NonTerminal(a), Symbol::NonTerminal(b))
1055                    | (Symbol::External(a), Symbol::External(b))
1056                        if a == b =>
1057                    {
1058                        let new_item = LRItem::new(item.rule_id, item.position + 1, item.lookahead);
1059                        new_set.add_item(new_item);
1060                    }
1061                    _ => {}
1062                }
1063            }
1064        }
1065
1066        // Compute closure of the new set
1067        let _ = new_set.closure(grammar, _first_follow);
1068        new_set
1069    }
1070}
1071
1072/// Collection of all LR(1) item sets (parser states)
1073#[derive(Debug, Clone)]
1074#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
1075pub struct ItemSetCollection {
1076    /// All computed LR(1) item sets (parser states).
1077    pub sets: Vec<ItemSet>,
1078    /// GOTO transitions: `(from_state, symbol) -> to_state`.
1079    pub goto_table: IndexMap<(StateId, SymbolId), StateId>,
1080    /// Track which symbols in goto_table are terminals (true) vs non-terminals (false)
1081    pub symbol_is_terminal: IndexMap<SymbolId, bool>,
1082}
1083
1084impl ItemSetCollection {
1085    /// Build canonical collection of LR(1) item sets for augmented grammar
1086    pub fn build_canonical_collection_augmented(
1087        grammar: &Grammar,
1088        first_follow: &FirstFollowSets,
1089        augmented_start: SymbolId,
1090        _original_start: SymbolId,
1091        eof_symbol: SymbolId,
1092    ) -> Self {
1093        let mut collection = Self {
1094            sets: Vec::new(),
1095            goto_table: IndexMap::new(),
1096            symbol_is_terminal: IndexMap::new(),
1097        };
1098
1099        // Create initial state with the augmented start rule S' -> S $
1100        let mut initial_set = ItemSet::new(StateId(0));
1101
1102        // Find the augmented start rule
1103        if let Some(augmented_rules) = grammar.get_rules_for_symbol(augmented_start) {
1104            for rule in augmented_rules {
1105                // Add S' -> • S with lookahead $ (EOF)
1106                let start_item = LRItem::new(
1107                    RuleId(rule.production_id.0),
1108                    0,
1109                    eof_symbol, // EOF symbol
1110                );
1111                initial_set.add_item(start_item);
1112            }
1113        }
1114
1115        // Compute closure
1116        let _ = initial_set.closure(grammar, first_follow);
1117        debug_trace!(
1118            "Initial state 0 after closure has {} items:",
1119            initial_set.items.len()
1120        );
1121
1122        // Track what symbols we expect transitions for
1123        let mut expected_terminals = std::collections::BTreeSet::new();
1124        let mut expected_nonterminals = std::collections::BTreeSet::new();
1125
1126        for item in &initial_set.items {
1127            // Print each item to debug
1128            if let Some(rule) = grammar
1129                .all_rules()
1130                .find(|r| r.production_id.0 == item.rule_id.0)
1131            {
1132                let mut rhs_str = String::new();
1133                for (idx, sym) in rule.rhs.iter().enumerate() {
1134                    if idx == item.position {
1135                        rhs_str.push_str(" • ");
1136                    }
1137                    match sym {
1138                        Symbol::Terminal(id) => rhs_str.push_str(&format!("T({}) ", id.0)),
1139                        Symbol::NonTerminal(id) => rhs_str.push_str(&format!("NT({}) ", id.0)),
1140                        _ => rhs_str.push_str("? "),
1141                    }
1142                }
1143                if item.position == rule.rhs.len() {
1144                    rhs_str.push_str(" • ");
1145                }
1146                debug_trace!(
1147                    "  Item: NT({}) -> {}, lookahead={}",
1148                    rule.lhs.0,
1149                    rhs_str,
1150                    item.lookahead.0
1151                );
1152
1153                // Track what symbol is next
1154                if item.position < rule.rhs.len() {
1155                    match &rule.rhs[item.position] {
1156                        Symbol::Terminal(t) => {
1157                            expected_terminals.insert(*t);
1158                        }
1159                        Symbol::NonTerminal(nt) => {
1160                            expected_nonterminals.insert(*nt);
1161                        }
1162                        _ => {}
1163                    }
1164                }
1165            }
1166        }
1167
1168        debug_trace!("State 0 expects transitions for:");
1169        debug_trace!("  Terminals: {:?}", expected_terminals);
1170        debug_trace!("  Nonterminals: {:?}", expected_nonterminals);
1171
1172        collection.sets.push(initial_set);
1173        let mut state_counter = 1;
1174
1175        // Build all reachable states (same as before)
1176        let mut i = 0;
1177        while i < collection.sets.len() {
1178            let current_set = collection.sets[i].clone();
1179
1180            // Debug: Print all items in this state
1181            for item in &current_set.items {
1182                if let Some(rule) = grammar
1183                    .all_rules()
1184                    .find(|r| r.production_id.0 == item.rule_id.0)
1185                {
1186                    let mut rhs_str = String::new();
1187                    for (idx, sym) in rule.rhs.iter().enumerate() {
1188                        if idx == item.position {
1189                            rhs_str.push_str(" • ");
1190                        }
1191                        rhs_str.push_str(&format!("{:?} ", sym));
1192                    }
1193                    if item.position == rule.rhs.len() {
1194                        rhs_str.push_str(" • ");
1195                    }
1196                    // "  [{}] {:?} -> {} , lookahead={}"
1197                }
1198            }
1199
1200            // Find all symbols that can be shifted from this state
1201            let mut symbols = BTreeSet::new();
1202            let mut _terminal_count = 0;
1203            let mut _non_terminal_count = 0;
1204            if i == 0 {
1205                debug_trace!("\n=== State 0 Analysis ===");
1206                debug_trace!("State 0 has {} items:", current_set.items.len());
1207            }
1208            for (_idx, item) in current_set.items.iter().enumerate() {
1209                if i == 0 {
1210                    // Print the item details
1211                    if let Some(rule) = grammar
1212                        .all_rules()
1213                        .find(|r| r.production_id.0 == item.rule_id.0)
1214                    {
1215                        let mut item_str = String::new();
1216                        item_str.push_str(&format!("NT({}) -> ", rule.lhs.0));
1217                        for (pos, sym) in rule.rhs.iter().enumerate() {
1218                            if pos == item.position {
1219                                item_str.push_str("• ");
1220                            }
1221                            match sym {
1222                                Symbol::Terminal(t) => item_str.push_str(&format!("T({}) ", t.0)),
1223                                Symbol::NonTerminal(nt) => {
1224                                    item_str.push_str(&format!("NT({}) ", nt.0))
1225                                }
1226                                Symbol::External(e) => item_str.push_str(&format!("EXT({}) ", e.0)),
1227                                _ => item_str.push_str(&format!("{:?} ", sym)),
1228                            }
1229                        }
1230                        if item.position == rule.rhs.len() {
1231                            item_str.push_str("• ");
1232                        }
1233                        debug_trace!("  Item {}: {} (rule_id={})", _idx, item_str, item.rule_id.0);
1234                    }
1235                }
1236
1237                if let Some(symbol) = item.next_symbol(grammar) {
1238                    match symbol {
1239                        Symbol::Terminal(_id) => {
1240                            _terminal_count += 1;
1241                        }
1242                        Symbol::NonTerminal(_id) => {
1243                            _non_terminal_count += 1;
1244                        }
1245                        Symbol::External(_id) => {
1246                            _terminal_count += 1; // Count externals as terminals
1247                        }
1248                        _ => {}
1249                    }
1250                    symbols.insert(symbol.clone());
1251                    if i == 0 {
1252                        debug_trace!("    -> next symbol: {:?}", symbol);
1253                    }
1254                }
1255            }
1256
1257            if i == 0 {
1258                debug_trace!("\nState 0 summary:");
1259                debug_trace!("  Total symbols that can be shifted: {}", symbols.len());
1260                debug_trace!("  Terminals: {}", _terminal_count);
1261                debug_trace!("  Non-terminals: {}", _non_terminal_count);
1262                debug_trace!("  Symbols: {:?}\n", symbols);
1263            }
1264
1265            // Debug: symbols.len(), _terminal_count, _non_terminal_count
1266            // Compute GOTO for each symbol
1267            for symbol in symbols {
1268                let goto_set = current_set.goto(&symbol, grammar, first_follow);
1269
1270                if !goto_set.items.is_empty() {
1271                    // Check if this set already exists
1272                    let existing_state = collection
1273                        .sets
1274                        .iter()
1275                        .find(|set| set.items == goto_set.items)
1276                        .map(|set| set.id);
1277
1278                    let target_state = if let Some(existing_id) = existing_state {
1279                        existing_id
1280                    } else {
1281                        // Add new state
1282                        let new_id = StateId(state_counter);
1283                        let mut new_set = goto_set;
1284                        new_set.id = new_id;
1285                        collection.sets.push(new_set);
1286                        state_counter += 1;
1287                        new_id
1288                    };
1289
1290                    // Add to GOTO table
1291                    let symbol_id = match symbol {
1292                        Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
1293                        Symbol::Optional(_)
1294                        | Symbol::Repeat(_)
1295                        | Symbol::RepeatOne(_)
1296                        | Symbol::Choice(_)
1297                        | Symbol::Sequence(_)
1298                        | Symbol::Epsilon => {
1299                            panic!(
1300                                "Complex symbols should be normalized before LR item generation"
1301                            );
1302                        }
1303                    };
1304                    if current_set.id.0 == 0 {
1305                        debug_trace!(
1306                            "  State 0 GOTO: symbol {:?} -> state {}",
1307                            symbol_id,
1308                            target_state.0
1309                        );
1310                    }
1311                    collection
1312                        .goto_table
1313                        .insert((current_set.id, symbol_id), target_state);
1314
1315                    // Track whether this symbol is a terminal or non-terminal
1316                    let is_terminal = matches!(symbol, Symbol::Terminal(_) | Symbol::External(_));
1317                    collection.symbol_is_terminal.insert(symbol_id, is_terminal);
1318                    // "DEBUG: Added goto({}, {}) = {}"
1319                }
1320            }
1321
1322            i += 1;
1323        }
1324
1325        collection
1326    }
1327
1328    /// Build canonical collection of LR(1) item sets.
1329    ///
1330    /// # Examples
1331    ///
1332    /// ```
1333    /// use adze_glr_core::{FirstFollowSets, ItemSetCollection};
1334    /// use adze_ir::*;
1335    ///
1336    /// let mut grammar = Grammar::new("simple".into());
1337    /// let a = SymbolId(1);
1338    /// let s = SymbolId(10);
1339    ///
1340    /// grammar.tokens.insert(a, Token { name: "a".into(), pattern: TokenPattern::String("a".into()), fragile: false });
1341    /// grammar.rule_names.insert(s, "S".into());
1342    /// grammar.rules.insert(s, vec![
1343    ///     Rule { lhs: s, rhs: vec![Symbol::Terminal(a)], precedence: None, associativity: None, fields: vec![], production_id: ProductionId(0) },
1344    /// ]);
1345    ///
1346    /// let ff = FirstFollowSets::compute(&grammar).unwrap();
1347    /// let collection = ItemSetCollection::build_canonical_collection(&grammar, &ff);
1348    /// assert!(!collection.sets.is_empty(), "should have at least one state");
1349    /// ```
1350    pub fn build_canonical_collection(grammar: &Grammar, first_follow: &FirstFollowSets) -> Self {
1351        let mut collection = Self {
1352            sets: Vec::new(),
1353            goto_table: IndexMap::new(),
1354            symbol_is_terminal: IndexMap::new(),
1355        };
1356
1357        // Create initial state with augmented start rule
1358        let mut initial_set = ItemSet::new(StateId(0));
1359
1360        // Find the start symbol (LHS of the first rule in grammar)
1361        if let Some(start_symbol) = grammar.start_symbol() {
1362            // Debug: grammar.rule_names.get(&start_symbol)
1363
1364            // Add items for ALL rules with the start symbol as LHS
1365            if let Some(start_rules) = grammar.get_rules_for_symbol(start_symbol) {
1366                for rule in start_rules.iter() {
1367                    // Debug: idx, rule.lhs, rule.rhs, rule.production_id.0
1368                    let start_item = LRItem::new(
1369                        RuleId(rule.production_id.0),
1370                        0,
1371                        SymbolId(0), // EOF symbol
1372                    );
1373                    initial_set.add_item(start_item);
1374                    // Debug: rule.production_id.0
1375                }
1376            }
1377
1378            // Compute closure
1379            let _ = initial_set.closure(grammar, first_follow);
1380        }
1381
1382        // Only add initial set if it has items
1383        if initial_set.items.is_empty() {
1384            // Handle empty initial set if needed
1385        } else {
1386            for _item in &initial_set.items {
1387                // Debug: item.rule_id.0, item.position, item.lookahead.0
1388            }
1389        }
1390
1391        collection.sets.push(initial_set);
1392        let mut state_counter = 1;
1393
1394        // Build all reachable states
1395        let mut i = 0;
1396        while i < collection.sets.len() {
1397            let current_set = collection.sets[i].clone();
1398
1399            // Debug: Print all items in this state
1400            for item in &current_set.items {
1401                if let Some(rule) = grammar
1402                    .all_rules()
1403                    .find(|r| r.production_id.0 == item.rule_id.0)
1404                {
1405                    let mut rhs_str = String::new();
1406                    for (idx, sym) in rule.rhs.iter().enumerate() {
1407                        if idx == item.position {
1408                            rhs_str.push_str(" • ");
1409                        }
1410                        rhs_str.push_str(&format!("{:?} ", sym));
1411                    }
1412                    if item.position == rule.rhs.len() {
1413                        rhs_str.push_str(" • ");
1414                    }
1415                    // "  [{}] {:?} -> {} , lookahead={}"
1416                }
1417            }
1418
1419            // Find all symbols that can be shifted from this state
1420            let mut symbols = BTreeSet::new();
1421            let mut _terminal_count = 0;
1422            let mut _non_terminal_count = 0;
1423            if i == 0 {
1424                debug_trace!("\n=== State 0 Analysis ===");
1425                debug_trace!("State 0 has {} items:", current_set.items.len());
1426            }
1427            for (_idx, item) in current_set.items.iter().enumerate() {
1428                if i == 0 {
1429                    // Print the item details
1430                    if let Some(rule) = grammar
1431                        .all_rules()
1432                        .find(|r| r.production_id.0 == item.rule_id.0)
1433                    {
1434                        let mut item_str = String::new();
1435                        item_str.push_str(&format!("NT({}) -> ", rule.lhs.0));
1436                        for (pos, sym) in rule.rhs.iter().enumerate() {
1437                            if pos == item.position {
1438                                item_str.push_str("• ");
1439                            }
1440                            match sym {
1441                                Symbol::Terminal(t) => item_str.push_str(&format!("T({}) ", t.0)),
1442                                Symbol::NonTerminal(nt) => {
1443                                    item_str.push_str(&format!("NT({}) ", nt.0))
1444                                }
1445                                Symbol::External(e) => item_str.push_str(&format!("EXT({}) ", e.0)),
1446                                _ => item_str.push_str(&format!("{:?} ", sym)),
1447                            }
1448                        }
1449                        if item.position == rule.rhs.len() {
1450                            item_str.push_str("• ");
1451                        }
1452                        debug_trace!("  Item {}: {} (rule_id={})", _idx, item_str, item.rule_id.0);
1453                    }
1454                }
1455
1456                if let Some(symbol) = item.next_symbol(grammar) {
1457                    match symbol {
1458                        Symbol::Terminal(_id) => {
1459                            _terminal_count += 1;
1460                        }
1461                        Symbol::NonTerminal(_id) => {
1462                            _non_terminal_count += 1;
1463                        }
1464                        Symbol::External(_id) => {
1465                            _terminal_count += 1; // Count externals as terminals
1466                        }
1467                        _ => {}
1468                    }
1469                    symbols.insert(symbol.clone());
1470                    if i == 0 {
1471                        debug_trace!("    -> next symbol: {:?}", symbol);
1472                    }
1473                }
1474            }
1475
1476            if i == 0 {
1477                debug_trace!("\nState 0 summary:");
1478                debug_trace!("  Total symbols that can be shifted: {}", symbols.len());
1479                debug_trace!("  Terminals: {}", _terminal_count);
1480                debug_trace!("  Non-terminals: {}", _non_terminal_count);
1481                debug_trace!("  Symbols: {:?}\n", symbols);
1482            }
1483
1484            // Debug: symbols.len(), _terminal_count, _non_terminal_count
1485            for item in &current_set.items {
1486                if let Some(symbol) = item.next_symbol(grammar) {
1487                    let _symbol_id = match &symbol {
1488                        Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
1489                        _ => panic!("Complex symbol"),
1490                    };
1491                    // "  Item rule_id={}, position={}, next_symbol={:?} (id={})"
1492                }
1493            }
1494
1495            for symbol in &symbols {
1496                let _symbol_id = match symbol {
1497                    Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
1498                    _ => panic!("Complex symbol"),
1499                };
1500            }
1501
1502            // Compute GOTO for each symbol
1503            for symbol in symbols {
1504                let goto_set = current_set.goto(&symbol, grammar, first_follow);
1505
1506                if !goto_set.items.is_empty() {
1507                    // Check if this set already exists
1508                    let existing_state = collection
1509                        .sets
1510                        .iter()
1511                        .find(|set| set.items == goto_set.items)
1512                        .map(|set| set.id);
1513
1514                    let target_state = if let Some(existing_id) = existing_state {
1515                        existing_id
1516                    } else {
1517                        // Add new state
1518                        let new_id = StateId(state_counter);
1519                        let mut new_set = goto_set;
1520                        new_set.id = new_id;
1521                        collection.sets.push(new_set);
1522                        state_counter += 1;
1523                        new_id
1524                    };
1525
1526                    // Add to GOTO table
1527                    let symbol_id = match symbol {
1528                        Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => id,
1529                        Symbol::Optional(_)
1530                        | Symbol::Repeat(_)
1531                        | Symbol::RepeatOne(_)
1532                        | Symbol::Choice(_)
1533                        | Symbol::Sequence(_)
1534                        | Symbol::Epsilon => {
1535                            panic!(
1536                                "Complex symbols should be normalized before LR item generation"
1537                            );
1538                        }
1539                    };
1540                    if current_set.id.0 == 0 {
1541                        debug_trace!(
1542                            "  State 0 GOTO: symbol {:?} -> state {}",
1543                            symbol_id,
1544                            target_state.0
1545                        );
1546                    }
1547                    collection
1548                        .goto_table
1549                        .insert((current_set.id, symbol_id), target_state);
1550
1551                    // Track whether this symbol is a terminal or non-terminal
1552                    let is_terminal = matches!(symbol, Symbol::Terminal(_) | Symbol::External(_));
1553                    collection.symbol_is_terminal.insert(symbol_id, is_terminal);
1554                    // "DEBUG: Added goto({}, {}) = {}"
1555                }
1556            }
1557
1558            i += 1;
1559        }
1560
1561        collection
1562    }
1563}
1564
1565/// Lexer mode for a parser state
1566#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1567#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
1568pub struct LexMode {
1569    /// Internal lexer DFA state
1570    pub lex_state: u16,
1571    /// State for the external scanner (if any)
1572    pub external_lex_state: u16,
1573}
1574
1575/// How GOTO table columns are indexed
1576#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1577#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
1578pub enum GotoIndexing {
1579    /// Use nonterminal_to_index mapping (standard)
1580    NonterminalMap,
1581    /// Use SymbolId.0 directly as column index (some table generators)
1582    DirectSymbolId,
1583}
1584
1585/// GLR-compatible parse table supporting multiple actions per state
1586#[derive(Debug, Clone)]
1587#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
1588#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
1589pub struct ParseTable {
1590    /// ACTION table: indexed by `[state][terminal]` using symbol_to_index
1591    pub action_table: Vec<Vec<ActionCell>>,
1592    /// GOTO table: indexed by `[state][nonterminal]` using nonterminal_to_index or direct ID
1593    pub goto_table: Vec<Vec<StateId>>,
1594    /// Metadata (name, visibility, etc.) for each symbol in the grammar.
1595    pub symbol_metadata: Vec<SymbolMetadata>,
1596    /// Total number of parser states.
1597    pub state_count: usize,
1598    /// Total number of symbols (terminals + non-terminals).
1599    pub symbol_count: usize,
1600    /// Maps terminal symbols to ACTION table column indices
1601    pub symbol_to_index: BTreeMap<SymbolId, usize>,
1602    /// Index -> SymbolId, perfectly mirroring `symbol_to_index`.
1603    pub index_to_symbol: Vec<SymbolId>,
1604    /// For each state, a bitset indicating which external tokens are valid
1605    pub external_scanner_states: Vec<Vec<bool>>,
1606    /// Grammar rules for reduction
1607    pub rules: Vec<ParseRule>,
1608    /// Maps nonterminal symbols to GOTO table column indices
1609    pub nonterminal_to_index: BTreeMap<SymbolId, usize>,
1610    /// How GOTO table columns are indexed
1611    pub goto_indexing: GotoIndexing,
1612    /// EOF symbol ID
1613    pub eof_symbol: SymbolId,
1614    /// Start symbol ID
1615    pub start_symbol: SymbolId,
1616    /// Grammar metadata
1617    pub grammar: Grammar,
1618    /// Initial parser state (default: 0, Tree-sitter uses 1)
1619    pub initial_state: StateId,
1620    /// Number of tokens (regular terminals)
1621    pub token_count: usize,
1622    /// Number of external tokens (from external scanner)
1623    pub external_token_count: usize,
1624    /// Lex modes for each state (length == state_count)
1625    pub lex_modes: Vec<LexMode>,
1626    /// Terminal symbols to skip as whitespace/comments
1627    pub extras: Vec<SymbolId>,
1628    /// Dynamic precedence for each rule (optional)
1629    pub dynamic_prec_by_rule: Vec<i16>,
1630    /// Associativity for each rule: -1=Right, 0=None, +1=Left
1631    pub rule_assoc_by_rule: Vec<i8>,
1632    /// Alias sequences for rules
1633    pub alias_sequences: Vec<Vec<Option<SymbolId>>>,
1634    /// Field names
1635    pub field_names: Vec<String>,
1636    /// Map (rule, child_index) -> field_id
1637    pub field_map: BTreeMap<(RuleId, u16), u16>,
1638}
1639
1640/// Parse rule for reduction
1641#[derive(Debug, Clone)]
1642#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
1643#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
1644pub struct ParseRule {
1645    /// Left-hand side non-terminal symbol of the rule.
1646    pub lhs: SymbolId,
1647    /// Number of symbols on the right-hand side.
1648    pub rhs_len: u16,
1649}
1650
1651impl Default for ParseTable {
1652    fn default() -> Self {
1653        Self {
1654            action_table: vec![],
1655            goto_table: vec![],
1656            symbol_metadata: vec![],
1657            state_count: 0,
1658            symbol_count: 0,
1659            symbol_to_index: BTreeMap::new(),
1660            index_to_symbol: vec![],
1661            external_scanner_states: vec![],
1662            rules: vec![],
1663            nonterminal_to_index: BTreeMap::new(),
1664            goto_indexing: GotoIndexing::NonterminalMap,
1665            eof_symbol: SymbolId(0),
1666            start_symbol: SymbolId(0),
1667            grammar: Grammar::new("default".to_string()),
1668            initial_state: StateId(0),
1669            token_count: 0,
1670            external_token_count: 0,
1671            lex_modes: vec![],
1672            extras: vec![],
1673            dynamic_prec_by_rule: vec![],
1674            rule_assoc_by_rule: vec![],
1675            alias_sequences: vec![],
1676            field_names: vec![],
1677            field_map: BTreeMap::new(),
1678        }
1679    }
1680}
1681
1682impl ParseTable {
1683    /// Builder method to auto-detect GOTO indexing
1684    pub fn with_detected_goto_indexing(mut self) -> Self {
1685        self.detect_goto_indexing();
1686        self
1687    }
1688
1689    /// Normalize EOF symbol to SymbolId(0) for consistency
1690    /// This ensures compatibility with various table producers
1691    pub fn normalize_eof_to_zero(mut self) -> Self {
1692        // If EOF is already 0, nothing to do
1693        if self.eof_symbol == SymbolId(0) {
1694            return self;
1695        }
1696
1697        let old_eof = self.eof_symbol;
1698        // Log the normalization for debugging
1699        #[cfg(debug_assertions)]
1700        debug_trace!("Normalizing EOF from {:?} to SymbolId(0)", old_eof);
1701
1702        // Get the indices for remapping
1703        let old_idx = self.symbol_to_index.get(&old_eof).copied();
1704        let zero_idx = self.symbol_to_index.get(&SymbolId(0)).copied();
1705
1706        // Swap columns in ACTION table if both indices exist
1707        if let (Some(old_idx), Some(zero_idx)) = (old_idx, zero_idx) {
1708            for row in &mut self.action_table {
1709                if old_idx < row.len() && zero_idx < row.len() {
1710                    row.swap(old_idx, zero_idx);
1711                }
1712            }
1713            // Now: 0 → old_idx, old_eof → (removed)
1714            self.symbol_to_index.insert(SymbolId(0), old_idx);
1715            self.symbol_to_index.remove(&old_eof);
1716
1717            // Update index_to_symbol if it exists
1718            if old_idx < self.index_to_symbol.len() {
1719                self.index_to_symbol[old_idx] = SymbolId(0);
1720            }
1721        } else if let Some(old_idx) = old_idx {
1722            // Only old EOF existed: move its column mapping to 0
1723            self.symbol_to_index.remove(&old_eof);
1724            self.symbol_to_index.insert(SymbolId(0), old_idx);
1725
1726            // Update index_to_symbol if it exists
1727            if old_idx < self.index_to_symbol.len() {
1728                self.index_to_symbol[old_idx] = SymbolId(0);
1729            }
1730        } else {
1731            // Neither mapped: ensure EOF->0 exists so consumers don't panic
1732            self.symbol_to_index.insert(SymbolId(0), 0);
1733        }
1734
1735        // Update EOF symbol
1736        self.eof_symbol = SymbolId(0);
1737        self
1738    }
1739
1740    /// Auto-detect the GOTO indexing mode based on table contents
1741    pub fn detect_goto_indexing(&mut self) {
1742        // Try to determine if the start symbol has a valid GOTO from state 0
1743        let start_nt = self.start_symbol;
1744
1745        // Check if start symbol has entry via nonterminal_to_index
1746        let col_map = self
1747            .nonterminal_to_index
1748            .get(&start_nt)
1749            .and_then(|&c| self.goto_table.first().and_then(|row| row.get(c)))
1750            .copied();
1751
1752        // Check if start symbol has entry via direct symbol ID
1753        let col_direct = self
1754            .goto_table
1755            .first()
1756            .and_then(|row| row.get(start_nt.0 as usize))
1757            .copied();
1758
1759        self.goto_indexing = match (col_map, col_direct) {
1760            (Some(s), _) if s.0 != 0 => GotoIndexing::NonterminalMap,
1761            (_, Some(s)) if s.0 != 0 => GotoIndexing::DirectSymbolId,
1762            // Default to nonterminal map; unit tests will catch a mismatch
1763            _ => GotoIndexing::NonterminalMap,
1764        };
1765    }
1766
1767    /// Get the terminal boundary (tokens + external tokens)
1768    #[inline]
1769    pub fn terminal_boundary(&self) -> usize {
1770        self.token_count + self.external_token_count
1771    }
1772
1773    /// Check if a symbol is a terminal
1774    #[inline]
1775    pub fn is_terminal(&self, sym: SymbolId) -> bool {
1776        (sym.0 as usize) < self.terminal_boundary()
1777    }
1778
1779    /// Get valid symbols mask for a state (terminals that have actions)
1780    pub fn valid_symbols(&self, state: StateId) -> Vec<bool> {
1781        let n = self.terminal_boundary();
1782        let mut v = vec![false; n];
1783        let s = state.0 as usize;
1784        if s < self.action_table.len() {
1785            for t in 0..n.min(self.action_table[s].len()) {
1786                v[t] = !self.action_table[s][t].is_empty();
1787            }
1788        }
1789        v
1790    }
1791
1792    /// Get actions for a state and symbol.
1793    ///
1794    /// Returns the slice of [`Action`]s for the given `(state, terminal)` pair.
1795    /// Returns an empty slice when no actions exist.
1796    ///
1797    /// # Examples
1798    ///
1799    /// ```
1800    /// use adze_glr_core::{FirstFollowSets, build_lr1_automaton, Action};
1801    /// use adze_ir::*;
1802    ///
1803    /// let mut grammar = Grammar::new("act".into());
1804    /// let a = SymbolId(1);
1805    /// let s = SymbolId(10);
1806    /// grammar.tokens.insert(a, Token { name: "a".into(), pattern: TokenPattern::String("a".into()), fragile: false });
1807    /// grammar.rule_names.insert(s, "S".into());
1808    /// grammar.rules.insert(s, vec![
1809    ///     Rule { lhs: s, rhs: vec![Symbol::Terminal(a)], precedence: None, associativity: None, fields: vec![], production_id: ProductionId(0) },
1810    /// ]);
1811    ///
1812    /// let ff = FirstFollowSets::compute(&grammar).unwrap();
1813    /// let table = build_lr1_automaton(&grammar, &ff).unwrap();
1814    ///
1815    /// // Initial state should have a Shift on terminal 'a'
1816    /// let actions = table.actions(table.initial_state, a);
1817    /// assert!(actions.iter().any(|a| matches!(a, Action::Shift(_))));
1818    /// ```
1819    #[inline]
1820    pub fn actions(&self, state: StateId, sym: SymbolId) -> &'_ [Action] {
1821        let s = state.0 as usize;
1822        let Some(&col) = self.symbol_to_index.get(&sym) else {
1823            return &[];
1824        };
1825        if s >= self.action_table.len() || col >= self.action_table[s].len() {
1826            return &[];
1827        }
1828        &self.action_table[s][col]
1829    }
1830
1831    /// Get goto state for a nonterminal.
1832    ///
1833    /// Returns the target state after reducing to `nt` in the given `state`,
1834    /// or `None` if no transition exists.
1835    ///
1836    /// # Examples
1837    ///
1838    /// ```
1839    /// use adze_glr_core::{FirstFollowSets, build_lr1_automaton};
1840    /// use adze_ir::*;
1841    ///
1842    /// let mut grammar = Grammar::new("goto".into());
1843    /// let a = SymbolId(1);
1844    /// let s = SymbolId(10);
1845    /// grammar.tokens.insert(a, Token { name: "a".into(), pattern: TokenPattern::String("a".into()), fragile: false });
1846    /// grammar.rule_names.insert(s, "S".into());
1847    /// grammar.rules.insert(s, vec![
1848    ///     Rule { lhs: s, rhs: vec![Symbol::Terminal(a)], precedence: None, associativity: None, fields: vec![], production_id: ProductionId(0) },
1849    /// ]);
1850    ///
1851    /// let ff = FirstFollowSets::compute(&grammar).unwrap();
1852    /// let table = build_lr1_automaton(&grammar, &ff).unwrap();
1853    ///
1854    /// // After shifting 'a' and reducing S→a, goto(0, S) should exist
1855    /// let target = table.goto(table.initial_state, s);
1856    /// assert!(target.is_some(), "goto(initial, S) should exist");
1857    /// ```
1858    #[inline]
1859    pub fn goto(&self, state: StateId, nt: SymbolId) -> Option<StateId> {
1860        let s = state.0 as usize;
1861        let &col = self.nonterminal_to_index.get(&nt)?;
1862        // Allow "no edge" to be represented as a sentinel (e.g., u16::MAX)
1863        let ns = *self.goto_table.get(s)?.get(col)?;
1864        (ns.0 != u16::MAX).then_some(ns)
1865    }
1866
1867    /// Get rule information by ID
1868    #[inline]
1869    pub fn rule(&self, id: RuleId) -> (SymbolId, u16) {
1870        let r = &self.rules[id.0 as usize];
1871        (r.lhs, r.rhs_len)
1872    }
1873
1874    /// Get EOF symbol
1875    #[inline]
1876    pub fn eof(&self) -> SymbolId {
1877        self.eof_symbol
1878    }
1879
1880    /// Get start symbol
1881    #[inline]
1882    pub fn start_symbol(&self) -> SymbolId {
1883        self.start_symbol
1884    }
1885
1886    /// Get grammar reference
1887    #[inline]
1888    pub fn grammar(&self) -> &Grammar {
1889        &self.grammar
1890    }
1891
1892    /// Get the ERROR symbol (by convention, symbol 0 or -1 in Tree-sitter)
1893    #[inline]
1894    pub fn error_symbol(&self) -> SymbolId {
1895        // Tree-sitter convention: ERROR is typically symbol 0
1896        // We could also check for a symbol named "ERROR" in the grammar
1897        SymbolId(0)
1898    }
1899
1900    /// Get valid symbols mask for a state (terminals that have actions)
1901    #[inline]
1902    pub fn valid_symbols_mask(&self, state: StateId) -> Vec<bool> {
1903        let n = self.terminal_boundary();
1904        let mut v = vec![false; n];
1905        let s = state.0 as usize;
1906        if s < self.action_table.len() {
1907            for t in 0..n.min(self.action_table[s].len()) {
1908                v[t] = !self.action_table[s][t].is_empty();
1909            }
1910        }
1911        v
1912    }
1913
1914    /// Get lex mode for a state
1915    #[inline]
1916    pub fn lex_mode(&self, state: StateId) -> LexMode {
1917        let idx = state.0 as usize;
1918        if idx < self.lex_modes.len() {
1919            self.lex_modes[idx]
1920        } else {
1921            LexMode {
1922                lex_state: 0,
1923                external_lex_state: 0,
1924            }
1925        }
1926    }
1927
1928    /// Check if a symbol is an extra (whitespace/comment)
1929    #[inline]
1930    pub fn is_extra(&self, sym: SymbolId) -> bool {
1931        self.extras.contains(&sym)
1932    }
1933
1934    /// Validate parse table invariants
1935    ///
1936    /// This method checks critical invariants that must hold for correct parsing:
1937    /// - EOF symbol is not internal ERROR sentinel
1938    /// - EOF symbol is a proper sentinel (>= token_count + external_token_count)
1939    /// - EOF symbol is present in symbol_to_index mapping
1940    /// - EOF and END columns have matching action patterns (parity)
1941    #[must_use = "validation result must be checked"]
1942    pub fn validate(&self) -> Result<(), TableError> {
1943        let terminal_boundary = self.token_count + self.external_token_count;
1944
1945        debug_assert_ne!(
1946            self.eof_symbol,
1947            parse_forest::ERROR_SYMBOL,
1948            "EOF symbol cannot be the ERROR sentinel"
1949        );
1950
1951        if self.eof_symbol == parse_forest::ERROR_SYMBOL {
1952            return Err(TableError::EofIsError);
1953        }
1954
1955        // Check EOF is a terminal sentinel beyond all non-EOF symbols.
1956        if (self.eof_symbol.0 as usize) < terminal_boundary {
1957            return Err(TableError::EofNotSentinel {
1958                eof: self.eof_symbol.0,
1959                token_count: self.token_count as u32,
1960                external_count: self.external_token_count as u32,
1961            });
1962        }
1963
1964        // Check EOF is in symbol_to_index
1965        if !self.symbol_to_index.contains_key(&self.eof_symbol) {
1966            return Err(TableError::EofMissingFromIndex);
1967        }
1968
1969        // Validate terminal partitions
1970        let tb = self.terminal_boundary();
1971
1972        // All extras must be regular terminals
1973        debug_assert!(
1974            self.extras
1975                .iter()
1976                .all(|&sym| (sym.0 as usize) < self.token_count),
1977            "all extras must be within [0..token_count)"
1978        );
1979
1980        // Regular terminals must not be external
1981        for sym_id in 0..self.token_count {
1982            let sym = SymbolId(sym_id as u16);
1983            debug_assert!(self.is_terminal(sym), "0..token_count are terminals");
1984            // Regular terminals are not external - we verify this by the band
1985            debug_assert!(
1986                (sym.0 as usize) < self.token_count,
1987                "regular terminals are in [0..token_count)"
1988            );
1989        }
1990
1991        // External tokens must be in their band
1992        for sym_id in self.token_count..tb {
1993            let sym = SymbolId(sym_id as u16);
1994            debug_assert!(self.is_terminal(sym), "external tokens are terminals");
1995            // External tokens are in the external band by definition
1996            debug_assert!(
1997                (sym.0 as usize) >= self.token_count && (sym.0 as usize) < tb,
1998                "external tokens are in [token_count..terminal_boundary)"
1999            );
2000        }
2001
2002        debug_assert!(
2003            self.symbol_to_index.contains_key(&self.eof_symbol),
2004            "EOF must exist in ACTION map"
2005        );
2006
2007        // Check EOF/END parity if we have END symbol in Tree-sitter (last terminal)
2008        // The END symbol in Tree-sitter is typically the last terminal before EOF
2009        if terminal_boundary > 0 {
2010            let end_symbol = SymbolId((terminal_boundary - 1) as u16);
2011            if let (Some(&eof_col), Some(&end_col)) = (
2012                self.symbol_to_index.get(&self.eof_symbol),
2013                self.symbol_to_index.get(&end_symbol),
2014            ) {
2015                // Check parity for each state
2016                for (state_idx, row) in self.action_table.iter().enumerate() {
2017                    if eof_col < row.len() && end_col < row.len() {
2018                        let eof_actions = &row[eof_col];
2019                        let end_actions = &row[end_col];
2020
2021                        // They should have the same action pattern (both empty or both non-empty)
2022                        // and if non-empty, should have compatible actions
2023                        if eof_actions.is_empty() != end_actions.is_empty() {
2024                            return Err(TableError::EofParityMismatch(state_idx as u16));
2025                        }
2026                    }
2027                }
2028            }
2029        }
2030
2031        Ok(())
2032    }
2033
2034    /// Remap GOTO table from NonterminalMap layout to DirectSymbolId layout.
2035    /// No-op if already DirectSymbolId.
2036    pub fn remap_goto_to_direct_symbol_id(mut self) -> Self {
2037        if matches!(self.goto_indexing, GotoIndexing::DirectSymbolId) {
2038            return self;
2039        }
2040        // Establish the max symbol id we need to size rows
2041        let max_sym = self
2042            .nonterminal_to_index
2043            .keys()
2044            .map(|s| s.0 as usize)
2045            .max()
2046            .unwrap_or(0);
2047        let new_width = max_sym + 1;
2048
2049        for row in &mut self.goto_table {
2050            // Defensive check: ensure all column indices are valid
2051            debug_assert!(
2052                self.nonterminal_to_index.values().all(|&c| c < row.len()),
2053                "nonterminal_to_index contains a column >= row width"
2054            );
2055
2056            let mut new_row = vec![StateId(0); new_width];
2057            // Move each mapped nonterminal from its old column into the col = symbol id
2058            for (sym, &old_col) in &self.nonterminal_to_index {
2059                if old_col < row.len() {
2060                    new_row[sym.0 as usize] = row[old_col];
2061                }
2062            }
2063            *row = new_row;
2064        }
2065        self.goto_indexing = GotoIndexing::DirectSymbolId;
2066        self
2067    }
2068
2069    /// Remap GOTO table from DirectSymbolId layout to NonterminalMap layout.
2070    /// No-op if already NonterminalMap.
2071    pub fn remap_goto_to_nonterminal_map(mut self) -> Self {
2072        if matches!(self.goto_indexing, GotoIndexing::NonterminalMap) {
2073            return self;
2074        }
2075        // Compute width for the map layout
2076        let width = self
2077            .nonterminal_to_index
2078            .values()
2079            .copied()
2080            .max()
2081            .unwrap_or(0)
2082            + 1;
2083        for row in &mut self.goto_table {
2084            // Defensive check: ensure source indices are valid
2085            debug_assert!(
2086                self.nonterminal_to_index
2087                    .keys()
2088                    .all(|s| (s.0 as usize) < row.len()),
2089                "nonterminal_to_index contains a symbol id >= row width"
2090            );
2091
2092            let mut new_row = vec![StateId(0); width];
2093            for (sym, &col) in &self.nonterminal_to_index {
2094                let src = sym.0 as usize;
2095                if src < row.len() && col < new_row.len() {
2096                    new_row[col] = row[src];
2097                }
2098            }
2099            *row = new_row;
2100        }
2101        self.goto_indexing = GotoIndexing::NonterminalMap;
2102        self
2103    }
2104}
2105
2106/// Actions in GLR parse table (supporting multiple actions per state)
2107#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
2108#[non_exhaustive]
2109#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2110pub enum Action {
2111    /// Shift the current token and transition to the given state.
2112    Shift(StateId),
2113    /// Reduce by the given grammar rule.
2114    Reduce(RuleId),
2115    /// Accept the input (parsing complete).
2116    Accept,
2117    /// No valid action (syntax error).
2118    Error,
2119    /// Tree-sitter error recovery — insert missing node.
2120    Recover,
2121    /// GLR fork point — multiple valid actions to explore.
2122    Fork(Vec<Action>),
2123}
2124
2125/// Action cell that can hold multiple actions for GLR
2126pub type ActionCell = Vec<Action>;
2127
2128/// Symbol metadata for the parse table
2129#[derive(Debug, Clone)]
2130#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
2131#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2132pub struct SymbolMetadata {
2133    /// Human-readable symbol name.
2134    pub name: String,
2135    /// Whether the symbol is visible in the syntax tree.
2136    pub is_visible: bool,
2137    /// Whether the symbol is a named node (vs anonymous).
2138    pub is_named: bool,
2139    /// Whether the symbol is a supertype node.
2140    pub is_supertype: bool,
2141    // Additional fields required by API contracts
2142    /// Whether the symbol is a terminal (leaf token).
2143    pub is_terminal: bool,
2144    /// Whether the symbol is an extra (e.g., whitespace, comments).
2145    pub is_extra: bool,
2146    /// Whether the symbol is fragile (invalidated by edits).
2147    pub is_fragile: bool,
2148    /// Unique identifier for this symbol.
2149    pub symbol_id: SymbolId,
2150}
2151
2152/// Conflict detection and resolution
2153#[derive(Debug, Clone)]
2154#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2155pub struct ConflictResolver {
2156    /// All detected parse table conflicts.
2157    pub conflicts: Vec<Conflict>,
2158}
2159
2160/// Conflict information for GLR parsing
2161#[derive(Debug, Clone)]
2162#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2163pub struct Conflict {
2164    /// Parser state where the conflict occurs.
2165    pub state: StateId,
2166    /// Lookahead symbol that triggers the conflict.
2167    pub symbol: SymbolId,
2168    /// Conflicting actions for this state/symbol pair.
2169    pub actions: Vec<Action>,
2170    /// Classification of the conflict.
2171    pub conflict_type: ConflictType,
2172}
2173
2174/// Type of parser conflict
2175#[derive(Debug, Clone, PartialEq, Eq)]
2176#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2177pub enum ConflictType {
2178    /// Conflict between a shift action and a reduce action.
2179    ShiftReduce,
2180    /// Conflict between two different reduce actions.
2181    ReduceReduce,
2182}
2183
2184impl ConflictResolver {
2185    /// Detect conflicts in the parse table.
2186    ///
2187    /// Scans every item set and reports shift/reduce or reduce/reduce conflicts.
2188    ///
2189    /// # Examples
2190    ///
2191    /// ```
2192    /// use adze_glr_core::{ConflictResolver, ConflictType, FirstFollowSets, ItemSetCollection};
2193    /// use adze_ir::*;
2194    ///
2195    /// // E → a | E E  (inherently ambiguous)
2196    /// let mut grammar = Grammar::new("ambig".into());
2197    /// let a = SymbolId(1);
2198    /// let e = SymbolId(10);
2199    /// grammar.tokens.insert(a, Token { name: "a".into(), pattern: TokenPattern::String("a".into()), fragile: false });
2200    /// grammar.rule_names.insert(e, "E".into());
2201    /// grammar.rules.insert(e, vec![
2202    ///     Rule { lhs: e, rhs: vec![Symbol::Terminal(a)], precedence: None, associativity: None, fields: vec![], production_id: ProductionId(0) },
2203    ///     Rule { lhs: e, rhs: vec![Symbol::NonTerminal(e), Symbol::NonTerminal(e)], precedence: None, associativity: None, fields: vec![], production_id: ProductionId(1) },
2204    /// ]);
2205    ///
2206    /// let ff = FirstFollowSets::compute(&grammar).unwrap();
2207    /// let collection = ItemSetCollection::build_canonical_collection(&grammar, &ff);
2208    /// let resolver = ConflictResolver::detect_conflicts(&collection, &grammar, &ff);
2209    /// // An ambiguous grammar like E → a | E E should have conflicts
2210    /// assert!(!resolver.conflicts.is_empty(), "should detect conflicts");
2211    /// ```
2212    pub fn detect_conflicts(
2213        item_sets: &ItemSetCollection,
2214        grammar: &Grammar,
2215        _first_follow: &FirstFollowSets,
2216    ) -> Self {
2217        let mut conflicts = Vec::new();
2218
2219        for item_set in &item_sets.sets {
2220            let mut actions_by_symbol: IndexMap<SymbolId, Vec<Action>> = IndexMap::new();
2221
2222            // Collect all possible actions for each symbol in this state
2223            for item in &item_set.items {
2224                if item.is_reduce_item(grammar) {
2225                    // Check if this is a reduction to the start symbol with EOF lookahead
2226                    let mut is_accept = false;
2227
2228                    // Find the rule that corresponds to this rule ID
2229                    if let Some(start_symbol) = grammar.start_symbol() {
2230                        // Look through all rules to find the one with this rule ID
2231                        for rule in grammar.all_rules() {
2232                            if rule.production_id.0 == item.rule_id.0 {
2233                                // Check if this rule reduces to the start symbol and we have EOF lookahead
2234                                is_accept =
2235                                    rule.lhs == start_symbol && item.lookahead == SymbolId(0);
2236                                break;
2237                            }
2238                        }
2239                    }
2240
2241                    let action = if is_accept {
2242                        Action::Accept
2243                    } else {
2244                        Action::Reduce(item.rule_id)
2245                    };
2246
2247                    actions_by_symbol
2248                        .entry(item.lookahead)
2249                        .or_default()
2250                        .push(action);
2251                } else if let Some(symbol) = item.next_symbol(grammar) {
2252                    // Shift action
2253                    let symbol_id = match symbol {
2254                        Symbol::Terminal(id) | Symbol::NonTerminal(id) | Symbol::External(id) => {
2255                            *id
2256                        }
2257                        Symbol::Optional(_)
2258                        | Symbol::Repeat(_)
2259                        | Symbol::RepeatOne(_)
2260                        | Symbol::Choice(_)
2261                        | Symbol::Sequence(_)
2262                        | Symbol::Epsilon => {
2263                            panic!(
2264                                "Complex symbols should be normalized before LR item generation"
2265                            );
2266                        }
2267                    };
2268
2269                    if let Some(target_state) = item_sets.goto_table.get(&(item_set.id, symbol_id))
2270                    {
2271                        let action = Action::Shift(*target_state);
2272                        actions_by_symbol.entry(symbol_id).or_default().push(action);
2273                    }
2274                }
2275            }
2276
2277            // Check for conflicts
2278            for (symbol_id, actions) in actions_by_symbol {
2279                if actions.len() > 1 {
2280                    let conflict_type = if actions.iter().any(|a| matches!(a, Action::Shift(_)))
2281                        && actions.iter().any(|a| matches!(a, Action::Reduce(_)))
2282                    {
2283                        ConflictType::ShiftReduce
2284                    } else {
2285                        ConflictType::ReduceReduce
2286                    };
2287
2288                    conflicts.push(Conflict {
2289                        state: item_set.id,
2290                        symbol: symbol_id,
2291                        actions,
2292                        conflict_type,
2293                    });
2294                }
2295            }
2296        }
2297
2298        Self { conflicts }
2299    }
2300
2301    /// Resolve conflicts using precedence and associativity rules
2302    pub fn resolve_conflicts(&mut self, grammar: &Grammar) {
2303        // Clone conflicts to avoid borrowing issues
2304        let mut conflicts_to_resolve = self.conflicts.clone();
2305        for conflict in &mut conflicts_to_resolve {
2306            // Apply Tree-sitter's exact conflict resolution logic
2307            self.resolve_single_conflict(conflict, grammar);
2308        }
2309        self.conflicts = conflicts_to_resolve;
2310    }
2311
2312    fn resolve_single_conflict(&self, conflict: &mut Conflict, grammar: &Grammar) {
2313        // Implement Tree-sitter's exact precedence and associativity resolution
2314        // This is where we port the C logic for conflict resolution
2315
2316        match conflict.conflict_type {
2317            ConflictType::ShiftReduce => {
2318                // Apply precedence rules between shift and reduce
2319                // Higher precedence wins, same precedence uses associativity
2320                self.resolve_shift_reduce_conflict(conflict, grammar);
2321            }
2322            ConflictType::ReduceReduce => {
2323                // Apply precedence rules between multiple reduces
2324                // Usually choose the rule that appears first in the grammar
2325                self.resolve_reduce_reduce_conflict(conflict, grammar);
2326            }
2327        }
2328    }
2329
2330    fn resolve_shift_reduce_conflict(&self, conflict: &mut Conflict, grammar: &Grammar) {
2331        // Use Tree-sitter's exact precedence comparison logic
2332        let precedence_resolver = StaticPrecedenceResolver::from_grammar(grammar);
2333
2334        let mut shift_action = None;
2335        let mut reduce_action = None;
2336
2337        // Find shift and reduce actions
2338        for action in &conflict.actions {
2339            match action {
2340                Action::Shift(_) => shift_action = Some(action.clone()),
2341                Action::Reduce(_) => reduce_action = Some(action.clone()),
2342                _ => {}
2343            }
2344        }
2345
2346        match (shift_action, reduce_action) {
2347            (Some(shift), Some(reduce)) => {
2348                // Get precedence info for shift token
2349                let shift_prec = precedence_resolver.token_precedence(conflict.symbol);
2350
2351                // Get precedence info for reduce rule
2352                let reduce_prec = if let Action::Reduce(rule_id) = &reduce {
2353                    precedence_resolver.rule_precedence(*rule_id)
2354                } else {
2355                    None
2356                };
2357
2358                // Compare precedences
2359                // PRECEDENCE RESOLUTION: When precedence can definitively resolve the conflict,
2360                // we eliminate the lower-precedence action (not just re-order).
2361                // This ensures correct parsing for unambiguous grammars.
2362                match compare_precedences(shift_prec, reduce_prec) {
2363                    PrecedenceComparison::PreferShift => {
2364                        // Shift wins - eliminate reduce action
2365                        conflict.actions = vec![shift];
2366                    }
2367                    PrecedenceComparison::PreferReduce => {
2368                        // Reduce wins - eliminate shift action
2369                        conflict.actions = vec![reduce];
2370                    }
2371                    PrecedenceComparison::Error => {
2372                        // Non-associative conflict - this is an error
2373                        // Keep Fork to signal ambiguity for error reporting
2374                        conflict.actions = vec![Action::Fork(vec![shift, reduce])];
2375                    }
2376                    PrecedenceComparison::None => {
2377                        // No precedence info - use GLR fork to explore all paths
2378                        conflict.actions = vec![Action::Fork(vec![shift, reduce])];
2379                    }
2380                }
2381            }
2382            _ => {
2383                // Should not happen in a shift/reduce conflict
2384                // Keep original actions
2385            }
2386        }
2387    }
2388
2389    fn resolve_reduce_reduce_conflict(&self, conflict: &mut Conflict, _grammar: &Grammar) {
2390        // Choose the rule that appears first in the grammar
2391        // This is Tree-sitter's default behavior for reduce/reduce conflicts
2392
2393        let mut best_action = None;
2394        let mut best_rule_id = u16::MAX;
2395
2396        for action in &conflict.actions {
2397            if let Action::Reduce(rule_id) = action
2398                && rule_id.0 < best_rule_id
2399            {
2400                best_rule_id = rule_id.0;
2401                best_action = Some(action.clone());
2402            }
2403        }
2404
2405        if let Some(action) = best_action {
2406            conflict.actions = vec![action];
2407        }
2408    }
2409}
2410
2411/// Error types for GLR processing
2412#[derive(Debug, thiserror::Error)]
2413#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2414pub enum GLRError {
2415    /// Error originating from grammar validation.
2416    #[error("Grammar error: {0}")]
2417    GrammarError(#[from] GrammarError),
2418
2419    /// Conflict resolution could not be completed.
2420    #[error("Conflict resolution failed: {0}")]
2421    ConflictResolution(String),
2422
2423    /// State machine construction failed.
2424    #[error("State machine generation failed: {0}")]
2425    StateMachine(String),
2426
2427    /// Parse table failed post-generation validation.
2428    #[error("Table validation failed: {0}")]
2429    TableValidation(TableError),
2430
2431    /// Grammar contains complex symbols that must be normalized first.
2432    #[error("Complex symbols must be normalized before {operation}")]
2433    ComplexSymbolsNotNormalized { operation: String },
2434
2435    /// A complex symbol was found where a simple one was expected.
2436    #[error("Expected {expected} symbol, found complex symbol")]
2437    ExpectedSimpleSymbol { expected: String },
2438
2439    /// A symbol is in an invalid state for the requested operation.
2440    #[error("Invalid symbol state during {operation}")]
2441    InvalidSymbolState { operation: String },
2442}
2443
2444/// Errors related to parse table validation
2445#[derive(Debug, thiserror::Error)]
2446#[cfg_attr(feature = "strict_docs", allow(missing_docs))]
2447pub enum TableError {
2448    /// The EOF symbol ID collides with the built-in ERROR symbol.
2449    #[error("EOF symbol collides with ERROR")]
2450    EofIsError,
2451
2452    /// EOF symbol ID is too low; it must be a sentinel beyond all tokens.
2453    #[error(
2454        "EOF symbol must be >= token_count + external_token_count (EOF: {eof}, tokens: {token_count}, externals: {external_count})"
2455    )]
2456    EofNotSentinel {
2457        eof: u16,
2458        token_count: u32,
2459        external_count: u32,
2460    },
2461
2462    /// The EOF symbol is not registered in the symbol-to-index mapping.
2463    #[error("EOF not present in symbol_to_index")]
2464    EofMissingFromIndex,
2465
2466    /// ACTION table EOF column has mismatched accept/reduce entries.
2467    #[error("EOF column parity mismatch at state {0}")]
2468    EofParityMismatch(u16),
2469}
2470
2471/// Check if a symbol can derive the start symbol through unit productions
2472#[allow(dead_code)]
2473fn can_derive_start(grammar: &Grammar, symbol: SymbolId, start: SymbolId) -> bool {
2474    if symbol == start {
2475        return true;
2476    }
2477
2478    // Check if there's a rule symbol -> start
2479    if let Some(rules) = grammar.get_rules_for_symbol(symbol) {
2480        for rule in rules {
2481            if rule.rhs.len() == 1
2482                && let Symbol::NonTerminal(target) = &rule.rhs[0]
2483                && *target == start
2484            {
2485                return true;
2486            }
2487        }
2488    }
2489
2490    false
2491}
2492
2493/// Normalize action cells for deterministic output
2494/// Sorts actions by type and value, and removes duplicates
2495fn normalize_action_table(action_table: &mut Vec<Vec<ActionCell>>) {
2496    for row in action_table.iter_mut() {
2497        for cell in row.iter_mut() {
2498            normalize_action_cell(cell);
2499        }
2500    }
2501}
2502
2503/// Normalize a single action cell (recursive for Fork actions)
2504fn normalize_action_cell(cell: &mut ActionCell) {
2505    for action in cell.iter_mut() {
2506        normalize_action(action);
2507    }
2508    cell.sort_by_key(action_sort_key);
2509    cell.dedup();
2510}
2511
2512/// Normalize a single action (recursively normalizes Fork contents)
2513fn normalize_action(action: &mut Action) {
2514    if let Action::Fork(inner) = action {
2515        for inner_action in inner.iter_mut() {
2516            normalize_action(inner_action);
2517        }
2518        inner.sort_by_key(action_sort_key);
2519        inner.dedup();
2520    }
2521}
2522
2523/// Generate a sort key for actions to ensure deterministic ordering
2524fn action_sort_key(action: &Action) -> (u8, u16, u16, u16) {
2525    match action {
2526        Action::Shift(s) => (0, s.0, 0, 0),
2527        Action::Reduce(r) => (1, r.0, 0, 0),
2528        Action::Accept => (2, 0, 0, 0),
2529        Action::Error => (3, 0, 0, 0),
2530        Action::Recover => (4, 0, 0, 0),
2531        Action::Fork(inner) => {
2532            // Tie-break forks by length (minor) and first key (major)
2533            let first = inner.first().map(action_sort_key).unwrap_or((0, 0, 0, 0));
2534            (5, first.1, first.2, inner.len() as u16)
2535        }
2536    }
2537}
2538
2539/// Build LR(1) automaton (parse table) from grammar.
2540///
2541/// Constructs an augmented grammar, builds the canonical LR(1) collection,
2542/// and fills the ACTION / GOTO tables.
2543///
2544/// # Examples
2545///
2546/// ```
2547/// use adze_glr_core::{FirstFollowSets, build_lr1_automaton, Action};
2548/// use adze_ir::*;
2549///
2550/// let mut grammar = Grammar::new("ab".into());
2551/// let a = SymbolId(1);
2552/// let s = SymbolId(10);
2553///
2554/// grammar.tokens.insert(a, Token { name: "a".into(), pattern: TokenPattern::String("a".into()), fragile: false });
2555/// grammar.rule_names.insert(s, "S".into());
2556/// grammar.rules.insert(s, vec![
2557///     Rule { lhs: s, rhs: vec![Symbol::Terminal(a)], precedence: None, associativity: None, fields: vec![], production_id: ProductionId(0) },
2558/// ]);
2559///
2560/// let ff = FirstFollowSets::compute(&grammar).unwrap();
2561/// let table = build_lr1_automaton(&grammar, &ff).unwrap();
2562///
2563/// assert!(table.state_count > 0);
2564/// assert_eq!(table.start_symbol(), s);
2565/// // The table should contain an Accept action somewhere on EOF
2566/// let eof = table.eof();
2567/// let has_accept = (0..table.state_count).any(|st| {
2568///     table.actions(StateId(st as u16), eof).iter().any(|a| matches!(a, Action::Accept))
2569/// });
2570/// assert!(has_accept, "table must have an Accept action");
2571/// ```
2572pub fn build_lr1_automaton(
2573    grammar: &Grammar,
2574    first_follow: &FirstFollowSets,
2575) -> Result<ParseTable, GLRError> {
2576    // Debug: Print some rules to see their structure
2577    let mut rule_count = 0;
2578    for rule in grammar.all_rules() {
2579        if rule_count >= 10 {
2580            break;
2581        }
2582        let mut rhs_str = String::new();
2583        for sym in &rule.rhs {
2584            match sym {
2585                Symbol::Terminal(id) => rhs_str.push_str(&format!("T({}) ", id.0)),
2586                Symbol::NonTerminal(id) => rhs_str.push_str(&format!("NT({}) ", id.0)),
2587                _ => rhs_str.push_str("? "),
2588            }
2589        }
2590        rule_count += 1;
2591    }
2592
2593    // Build stable symbol partitions for table columns:
2594    // EOF, internal terminals, external terminals, then nonterminals.
2595    // Internal terminals are not limited to grammar.tokens; they may also come
2596    // from literal/punctuation terminals that only appear in rule RHS.
2597    let nonterminal_symbols: BTreeSet<SymbolId> = grammar.rules.keys().copied().collect();
2598    let external_symbols: BTreeSet<SymbolId> =
2599        grammar.externals.iter().map(|e| e.symbol_id).collect();
2600    let mut rhs_terminals: BTreeSet<SymbolId> = BTreeSet::new();
2601    for rule in grammar.all_rules() {
2602        for sym in &rule.rhs {
2603            if let Symbol::Terminal(id) = sym {
2604                rhs_terminals.insert(*id);
2605            }
2606        }
2607    }
2608
2609    // Calculate EOF symbol ID to avoid collisions with all known symbols,
2610    // including RHS-only terminals.
2611    let max_symbol = grammar
2612        .tokens
2613        .keys()
2614        .chain(grammar.rule_names.keys())
2615        .chain(nonterminal_symbols.iter())
2616        .chain(external_symbols.iter())
2617        .chain(rhs_terminals.iter())
2618        .map(|s| s.0)
2619        .max()
2620        .unwrap_or(0);
2621    let eof_symbol = SymbolId(max_symbol.checked_add(1).ok_or_else(|| {
2622        GLRError::StateMachine("EOF symbol would overflow u16: grammar has too many symbols".into())
2623    })?);
2624
2625    // Create augmented grammar with S' -> S $ rule
2626    let mut augmented_grammar = grammar.clone();
2627
2628    // Find the original start symbol
2629    let original_start =
2630        grammar
2631            .start_symbol()
2632            .ok_or(GLRError::GrammarError(GrammarError::UnresolvedSymbol(
2633                SymbolId(0),
2634            )))?;
2635
2636    if let Some(_name) = grammar.rule_names.get(&original_start) {}
2637
2638    // Create a new start symbol S' with an ID that won't conflict with existing symbols
2639    // Since eof_symbol = max_symbol + 1, we use max_symbol + 2 for augmented_start
2640    let augmented_start_id = max_symbol.checked_add(2).ok_or_else(|| {
2641        GLRError::StateMachine(
2642            "Augmented start symbol would overflow u16: grammar has too many symbols".into(),
2643        )
2644    })?;
2645    let augmented_start = SymbolId(augmented_start_id);
2646
2647    // Find the max production ID in the original grammar
2648    let max_production_id = grammar
2649        .all_rules()
2650        .map(|r| r.production_id.0)
2651        .max()
2652        .unwrap_or(0);
2653    let augmented_production_id = max_production_id
2654        .checked_add(1)
2655        .ok_or_else(|| GLRError::StateMachine("Production ID overflow".into()))?;
2656
2657    // Add S' -> S rule (we'll handle $ implicitly in the LR construction)
2658    let augmented_rule = Rule {
2659        lhs: augmented_start,
2660        rhs: vec![Symbol::NonTerminal(original_start)],
2661        precedence: None,
2662        associativity: None,
2663        fields: vec![],
2664        production_id: ProductionId(augmented_production_id),
2665    };
2666    augmented_grammar
2667        .rules
2668        .insert(augmented_start, vec![augmented_rule]);
2669    augmented_grammar
2670        .rule_names
2671        .insert(augmented_start, "$start".to_string());
2672
2673    // Build canonical collection of LR(1) item sets with augmented grammar
2674    let collection = ItemSetCollection::build_canonical_collection_augmented(
2675        &augmented_grammar,
2676        first_follow,
2677        augmented_start,
2678        original_start,
2679        eof_symbol,
2680    );
2681
2682    // Create mapping from symbol IDs to table indices
2683    let mut symbol_to_index = BTreeMap::new();
2684
2685    // IMPORTANT: EOF symbol must always have index 0 in Tree-sitter
2686    symbol_to_index.insert(eof_symbol, 0);
2687
2688    // Group symbols in Tree-sitter order:
2689    // 1. EOF first (index 0)
2690    // 2. Regular tokens (terminals)
2691    // 3. External tokens (terminals)
2692    // 4. Non-terminals last (gotos)
2693
2694    // Internal terminals are:
2695    // (declared tokens ∪ RHS terminals) − nonterminals − externals − EOF.
2696    let mut internal_terminals: BTreeSet<SymbolId> = grammar.tokens.keys().copied().collect();
2697    internal_terminals.extend(rhs_terminals.iter().copied());
2698    internal_terminals.remove(&eof_symbol);
2699    for id in &external_symbols {
2700        internal_terminals.remove(id);
2701    }
2702    for id in &nonterminal_symbols {
2703        internal_terminals.remove(id);
2704    }
2705
2706    let mut internal_tokens: Vec<SymbolId> = internal_terminals.into_iter().collect();
2707    internal_tokens.sort_by_key(|s| s.0);
2708    for &id in &internal_tokens {
2709        if !symbol_to_index.contains_key(&id) {
2710            let idx = symbol_to_index.len();
2711            symbol_to_index.insert(id, idx);
2712        }
2713    }
2714
2715    // Collect and sort external tokens
2716    let mut ext_tokens: Vec<SymbolId> = external_symbols.iter().copied().collect();
2717    ext_tokens.sort_by_key(|s| s.0);
2718    for &id in &ext_tokens {
2719        if !symbol_to_index.contains_key(&id) {
2720            let idx = symbol_to_index.len();
2721            symbol_to_index.insert(id, idx);
2722        }
2723    }
2724
2725    // Collect and sort non-terminals
2726    let mut non_terminals: Vec<SymbolId> = nonterminal_symbols.iter().copied().collect();
2727    non_terminals.sort_by_key(|s| s.0);
2728    for id in non_terminals {
2729        if !symbol_to_index.contains_key(&id) {
2730            let idx = symbol_to_index.len();
2731            symbol_to_index.insert(id, idx);
2732        }
2733    }
2734
2735    // Any remaining symbols indicate partition drift and should be investigated.
2736    let mut other_symbols: Vec<SymbolId> = grammar
2737        .rule_names
2738        .keys()
2739        .cloned()
2740        .filter(|id| !symbol_to_index.contains_key(id))
2741        .collect();
2742    other_symbols.sort_by_key(|s| s.0);
2743    if !other_symbols.is_empty() {
2744        return Err(GLRError::StateMachine(format!(
2745            "Unexpected symbols outside terminal/nonterminal partitions: {:?}",
2746            other_symbols
2747        )));
2748    }
2749
2750    // Calculate the final symbol count after adding all symbols including EOF
2751    let indexed_symbol_count = symbol_to_index.len();
2752
2753    // Create parse table with proper dimensions
2754    let state_count = collection.sets.len();
2755    let symbol_count = indexed_symbol_count; // Keep for compatibility
2756
2757    let mut action_table = vec![vec![Vec::new(); indexed_symbol_count]; state_count];
2758    let mut goto_table = vec![vec![StateId(0); indexed_symbol_count]; state_count];
2759
2760    // Track conflicts as we build the table
2761    let mut conflicts_by_state: BTreeMap<(usize, usize), Vec<Action>> = BTreeMap::new();
2762
2763    // Build rules for reduction and collect precedence info
2764    let mut rules = Vec::new();
2765    let mut dynamic_prec_by_rule = Vec::new();
2766    let mut rule_assoc_by_rule = Vec::new();
2767    let mut production_to_rule_id = BTreeMap::new();
2768
2769    for (rule_id, rule) in grammar.all_rules().enumerate() {
2770        production_to_rule_id.insert(rule.production_id.0, rule_id as u16);
2771        rules.push(ParseRule {
2772            lhs: rule.lhs,
2773            rhs_len: rule.rhs.len() as u16,
2774        });
2775
2776        // Extract precedence value for this rule
2777        let prec = match rule.precedence {
2778            Some(adze_ir::PrecedenceKind::Static(p)) => p,
2779            Some(adze_ir::PrecedenceKind::Dynamic(p)) => p,
2780            None => 0,
2781        };
2782        dynamic_prec_by_rule.push(prec);
2783
2784        // Extract associativity for this rule
2785        let assoc = match rule.associativity {
2786            Some(adze_ir::Associativity::Left) => 1,
2787            Some(adze_ir::Associativity::Right) => -1,
2788            _ => 0,
2789        };
2790        rule_assoc_by_rule.push(assoc);
2791    }
2792
2793    // Debug: Print goto table entries
2794    debug_trace!(
2795        "DEBUG: Collection goto table has {} entries",
2796        collection.goto_table.len()
2797    );
2798    debug_trace!(
2799        "DEBUG: Augmented grammar has {} tokens",
2800        augmented_grammar.tokens.len()
2801    );
2802
2803    // Debug: Print what tokens are in the augmented grammar
2804    debug_trace!("=== Symbol Classification Debug ===");
2805    debug_trace!(
2806        "Tokens in augmented_grammar: {:?}",
2807        augmented_grammar
2808            .tokens
2809            .keys()
2810            .map(|k| k.0)
2811            .collect::<Vec<_>>()
2812    );
2813    debug_trace!(
2814        "Externals in augmented_grammar: {:?}",
2815        augmented_grammar
2816            .externals
2817            .iter()
2818            .map(|e| e.symbol_id.0)
2819            .collect::<Vec<_>>()
2820    );
2821    debug_trace!("Original grammar tokens: {}", grammar.tokens.len());
2822    debug_trace!(
2823        "Collection goto_table size: {}",
2824        collection.goto_table.len()
2825    );
2826
2827    // Debug state 0 specifically
2828    let state0_gotos: Vec<_> = collection
2829        .goto_table
2830        .iter()
2831        .filter(|((from, _), _)| from.0 == 0)
2832        .collect();
2833    debug_trace!("State 0 has {} goto entries", state0_gotos.len());
2834    for ((_, _symbol), _to_state) in &state0_gotos {
2835        debug_trace!("  Symbol {} -> State {}", _symbol.0, _to_state.0);
2836    }
2837
2838    // First, add shift actions from goto table for terminals
2839    // This must be done BEFORE reduce actions to enable shift/reduce conflict detection
2840    let mut _terminal_count = 0;
2841    let mut _non_terminal_count = 0;
2842
2843    for ((from_state, symbol), to_state) in &collection.goto_table {
2844        // Check if this symbol is a terminal using the tracking from collection
2845        let is_terminal = collection
2846            .symbol_is_terminal
2847            .get(symbol)
2848            .copied()
2849            .unwrap_or(*symbol == eof_symbol); // EOF is a terminal
2850
2851        if from_state.0 == 0 {
2852            debug_trace!(
2853                "State 0 goto entry: symbol {} -> state {}, is_terminal={} (in tokens={}, in externals={}, is EOF={})",
2854                symbol.0,
2855                to_state.0,
2856                is_terminal,
2857                augmented_grammar.tokens.contains_key(symbol),
2858                augmented_grammar
2859                    .externals
2860                    .iter()
2861                    .any(|e| e.symbol_id == *symbol),
2862                symbol.0 == 0
2863            );
2864        }
2865
2866        if is_terminal {
2867            _terminal_count += 1;
2868            if let Some(&symbol_idx) = symbol_to_index.get(symbol) {
2869                let state_idx = from_state.0 as usize;
2870                if state_idx < action_table.len() && symbol_idx < action_table[state_idx].len() {
2871                    // Add as a shift action
2872                    let new_action = Action::Shift(*to_state);
2873                    if state_idx == 0 {
2874                        debug_trace!(
2875                            "DEBUG: Adding shift action to state 0: symbol {} (idx={}) -> state {}",
2876                            symbol.0,
2877                            symbol_idx,
2878                            to_state.0
2879                        );
2880                    }
2881                    add_action_with_conflict(
2882                        &mut action_table,
2883                        &mut conflicts_by_state,
2884                        state_idx,
2885                        symbol_idx,
2886                        new_action,
2887                    );
2888                } else if state_idx == 0 {
2889                    debug_trace!(
2890                        "DEBUG: SKIPPING shift for state 0: bounds check failed - state_idx={}, symbol_idx={}, action_table.len={}, inner_len={}",
2891                        state_idx,
2892                        symbol_idx,
2893                        action_table.len(),
2894                        if state_idx < action_table.len() {
2895                            action_table[state_idx].len()
2896                        } else {
2897                            0
2898                        }
2899                    );
2900                }
2901            } else if from_state.0 == 0 {
2902                debug_trace!(
2903                    "DEBUG: Terminal {} not in symbol_to_index for state 0",
2904                    symbol.0
2905                );
2906            }
2907        } else {
2908            _non_terminal_count += 1;
2909        }
2910    }
2911
2912    // Handle "extras" (like comments, whitespace, and external tokens marked as extras).
2913    // In every state, for every "extra" token, if there isn't already a specific
2914    // action, add a self-looping SHIFT action. This allows extras to appear
2915    // anywhere in the grammar without changing the parser's state.
2916    for state_idx in 0..state_count {
2917        for extra_symbol_id in &augmented_grammar.extras {
2918            if let Some(&symbol_idx) = symbol_to_index.get(extra_symbol_id) {
2919                // Check if an action already exists for this extra token in this state.
2920                // Only add self-loop if no action exists yet (empty cell means no action)
2921                if action_table[state_idx][symbol_idx].is_empty() {
2922                    // Add a self-looping shift that stays in the same state
2923                    action_table[state_idx][symbol_idx]
2924                        .push(Action::Shift(StateId(state_idx as u16)));
2925                }
2926            }
2927        }
2928    }
2929
2930    // Now fill action table with reduce actions
2931    for item_set in &collection.sets {
2932        let state_idx = item_set.id.0 as usize;
2933
2934        for item in &item_set.items {
2935            if item.is_reduce_item(&augmented_grammar) {
2936                // Check if this is a reduce by the augmented start rule
2937                if let Some(rule) = augmented_grammar
2938                    .all_rules()
2939                    .find(|r| r.production_id.0 == item.rule_id.0)
2940                    && rule.lhs == augmented_start
2941                {
2942                    if item.lookahead == eof_symbol {
2943                        // This is S' -> S • with lookahead $, add accept action
2944                        if let Some(&eof_idx) = symbol_to_index.get(&eof_symbol) {
2945                            add_action_with_conflict(
2946                                &mut action_table,
2947                                &mut conflicts_by_state,
2948                                state_idx,
2949                                eof_idx,
2950                                Action::Accept,
2951                            );
2952                        }
2953                    }
2954                    // NEVER add a regular reduce action for the augmented start rule
2955                    continue;
2956                }
2957
2958                // Regular reduce action
2959                if let Some(&rule_id) = production_to_rule_id.get(&item.rule_id.0) {
2960                    let rule = &rules[rule_id as usize];
2961                    let is_empty_production = rule.rhs_len == 0;
2962
2963                    // For empty productions, we need to add reduce actions for all symbols in FOLLOW set
2964                    let lookaheads_to_check: Vec<SymbolId> = if is_empty_production {
2965                        // Get FOLLOW set for the LHS of this rule
2966                        if let Some(follow_set) = first_follow.follow(rule.lhs) {
2967                            // Map FOLLOW set symbols to actual parse table symbols.
2968                            // This replaces EOF_SENTINEL (SymbolId(0)) with the actual eof_symbol.
2969                            follow_set
2970                                .ones()
2971                                .map(|idx| map_follow_symbol(SymbolId(idx as u16), eof_symbol))
2972                                .collect()
2973                        } else {
2974                            vec![item.lookahead]
2975                        }
2976                    } else {
2977                        vec![item.lookahead]
2978                    };
2979
2980                    for lookahead in lookaheads_to_check {
2981                        if let Some(&lookahead_idx) = symbol_to_index.get(&lookahead) {
2982                            let new_action = Action::Reduce(RuleId(rule_id));
2983
2984                            // Always add reduce actions - let conflict resolution handle precedence
2985                            add_action_with_conflict(
2986                                &mut action_table,
2987                                &mut conflicts_by_state,
2988                                state_idx,
2989                                lookahead_idx,
2990                                new_action,
2991                            );
2992                        }
2993                    }
2994                }
2995            }
2996            // Note: Shift actions were already added before this loop
2997        }
2998    }
2999
3000    // Shift actions were already added before reduce actions
3001
3002    // Build precedence tables once
3003    let production_count = augmented_grammar.all_rules().count() as u32;
3004    // token_count includes EOF (symbol 0 in table) plus all regular tokens.
3005    let token_count = (internal_tokens.len() + 1) as u32;
3006    let prec_tables = build_prec_tables(
3007        &augmented_grammar,
3008        &symbol_to_index,
3009        token_count,
3010        production_count,
3011    );
3012
3013    // Calculate the first non-terminal index
3014    // Terminals are: EOF + internal tokens + external tokens.
3015    // So first non-terminal is at the terminal boundary.
3016    let first_nonterminal_idx = internal_tokens.len() + ext_tokens.len() + 1;
3017
3018    // Resolve conflicts using precedence
3019    for ((state_idx, symbol_idx), _actions) in conflicts_by_state {
3020        // Guard rail: validate indices
3021        debug_assert!(state_idx < action_table.len(), "state_idx out of bounds");
3022        debug_assert!(
3023            symbol_idx < action_table[0].len(),
3024            "symbol_idx out of bounds"
3025        );
3026
3027        // Only resolve on terminal columns (never on gotos).
3028        // Terminals occupy indices [0, first_nonterminal_idx).
3029        if symbol_idx >= first_nonterminal_idx {
3030            continue; // Skip non-terminal columns
3031        }
3032
3033        let cell = &mut action_table[state_idx][symbol_idx];
3034
3035        // Guard rail: skip empty cells
3036        if cell.is_empty() {
3037            continue;
3038        }
3039
3040        // If ACCEPT is present, keep it alone (canonical LR(1) accept)
3041        if cell.iter().any(|a| matches!(a, Action::Accept)) {
3042            *cell = vec![Action::Accept];
3043            continue;
3044        }
3045
3046        // Extract first shift and the set of reduces in the cell
3047        let first_shift = cell.iter().find_map(|a| {
3048            if let Action::Shift(s) = a {
3049                Some(*s)
3050            } else {
3051                None
3052            }
3053        });
3054        let mut reduces: Vec<u16> = cell
3055            .iter()
3056            .filter_map(|a| {
3057                if let Action::Reduce(pid) = a {
3058                    Some(pid.0)
3059                } else {
3060                    None
3061                }
3062            })
3063            .collect();
3064
3065        // If there are multiple reduces, resolve them first (rule precedence)
3066        if reduces.len() > 1 {
3067            let winner = reduces[1..].iter().fold(reduces[0], |acc, &r| {
3068                decide_reduce_reduce(acc, r, &prec_tables)
3069            });
3070            reduces.clear();
3071            reduces.push(winner);
3072            // keep the non-reduce actions (shift/accept) as-is for now
3073            cell.retain(|a| {
3074                matches!(a, Action::Shift(_)) || matches!(a, Action::Reduce(pid) if pid.0 == winner)
3075            });
3076        }
3077
3078        // Now we have at most one reduce and at most one shift
3079        if let (Some(s), Some(r)) = (first_shift, reduces.first().copied()) {
3080            match decide_with_precedence(symbol_idx, r, &prec_tables) {
3081                PrecDecision::PreferShift => *cell = vec![Action::Shift(s)],
3082                PrecDecision::PreferReduce => *cell = vec![Action::Reduce(RuleId(r))],
3083                PrecDecision::Error => {
3084                    // Non-associative at equal precedence: forbid combination at this lookahead.
3085                    // For GLR you can either force a parse error here or keep both and let runtime err.
3086                    // Common Yacc behavior is to make it a syntax error:
3087                    // *cell = vec![Action::Error];  // Uncomment if you want to make it a hard error
3088                    // For now, keep both for GLR
3089                }
3090                PrecDecision::NoInfo => {
3091                    // For GLR: when no precedence information is available, keep both actions
3092                    // This preserves conflicts for GLR runtime to handle via forking
3093                    // Don't resolve the conflict - let GLR handle it at runtime
3094                }
3095            }
3096        }
3097    }
3098
3099    // Add non-terminal goto entries to the goto table
3100    for ((from_state, symbol), _to_state) in &collection.goto_table {
3101        // Check if this symbol is a non-terminal using the tracking from collection
3102        let is_terminal = collection
3103            .symbol_is_terminal
3104            .get(symbol)
3105            .copied()
3106            .unwrap_or(*symbol == eof_symbol); // EOF is a terminal
3107
3108        if !is_terminal && let Some(&symbol_idx) = symbol_to_index.get(symbol) {
3109            let state_idx = from_state.0 as usize;
3110            if state_idx < goto_table.len() && symbol_idx < goto_table[state_idx].len() {
3111                // "DEBUG: Setting goto for state {} non-terminal {} (id={}) -> state {}"
3112            }
3113        }
3114    }
3115
3116    // Fill goto table from collection's goto_table (kept for compatibility)
3117    for ((from_state, symbol), to_state) in &collection.goto_table {
3118        let from_idx = from_state.0 as usize;
3119        if let Some(&symbol_idx) = symbol_to_index.get(symbol) {
3120            goto_table[from_idx][symbol_idx] = *to_state;
3121        }
3122    }
3123
3124    // Post-process is no longer needed with proper augmentation
3125    // The accept action is added when we see S' -> S • with EOF lookahead
3126
3127    // But we still need to handle the original grammar's symbol mapping
3128    if let Some(_start_symbol) = grammar.start_symbol() {
3129        // Find all states and check if they need EOF reduce actions
3130        for (state_idx, _item_set) in collection.sets.iter().enumerate() {
3131            // Skip this post-processing - handled by augmentation
3132            let needs_eof_reduce = false;
3133            let reduce_rule_id: Option<RuleId> = None;
3134
3135            // If we found a reduce item that needs EOF action, ensure it's in the action table
3136            if needs_eof_reduce
3137                && let Some(rule_id) = reduce_rule_id
3138                && let Some(&eof_idx) = symbol_to_index.get(&SymbolId(0))
3139            {
3140                // Check if EOF action already exists
3141                if action_table[state_idx][eof_idx].is_empty() {
3142                    action_table[state_idx][eof_idx].push(Action::Reduce(rule_id));
3143                }
3144            }
3145        }
3146    }
3147
3148    // Build symbol metadata
3149    let mut symbol_metadata = Vec::new();
3150
3151    // Add terminal symbols
3152    for (symbol_id, token) in &grammar.tokens {
3153        symbol_metadata.push(SymbolMetadata {
3154            name: token.name.clone(),
3155            is_visible: !token.name.starts_with('_'),
3156            is_named: !matches!(&token.pattern, TokenPattern::String(_)),
3157            is_supertype: false,
3158            // Additional fields required by API contracts
3159            is_terminal: true,
3160            is_extra: grammar.extras.contains(symbol_id),
3161            is_fragile: false, // TODO: implement fragile token detection
3162            symbol_id: *symbol_id,
3163        });
3164    }
3165
3166    // Add non-terminal symbols
3167    for symbol_id in grammar.rules.keys() {
3168        let is_supertype = grammar.supertypes.contains(symbol_id);
3169        symbol_metadata.push(SymbolMetadata {
3170            name: format!("rule_{}", symbol_id.0),
3171            is_visible: true,
3172            is_named: true,
3173            is_supertype,
3174            // Additional fields required by API contracts
3175            is_terminal: false,
3176            is_extra: false,   // non-terminals are never extra
3177            is_fragile: false, // TODO: implement fragile token detection
3178            symbol_id: *symbol_id,
3179        });
3180    }
3181
3182    // Add external symbols
3183    for external in &grammar.externals {
3184        symbol_metadata.push(SymbolMetadata {
3185            name: external.name.clone(),
3186            is_visible: !external.name.starts_with('_'),
3187            is_named: true,
3188            is_supertype: false,
3189            // Additional fields required by API contracts
3190            is_terminal: true,             // external symbols are terminals
3191            is_extra: false,               // TODO: check if external symbol is in extras
3192            is_fragile: false,             // TODO: implement fragile token detection
3193            symbol_id: external.symbol_id, // use external symbol ID
3194        });
3195    }
3196
3197    // Compute external scanner states
3198    // For each state, determine which external tokens are valid
3199    // Now we only track validity - transitions are in the main action table
3200    let mut external_scanner_states =
3201        vec![vec![false; augmented_grammar.externals.len()]; state_count];
3202
3203    // Create a mapping from external symbol_id to index
3204    let mut external_symbol_to_idx = BTreeMap::new();
3205    for (idx, external) in augmented_grammar.externals.iter().enumerate() {
3206        external_symbol_to_idx.insert(external.symbol_id, idx);
3207    }
3208
3209    // Determine which external tokens are valid in each state
3210    // An external token is valid if there's a shift action for it in that state
3211    for state_idx in 0..state_count {
3212        for (external_idx, external) in augmented_grammar.externals.iter().enumerate() {
3213            // Check if this external has a shift action in this state
3214            if let Some(&symbol_idx) = symbol_to_index.get(&external.symbol_id) {
3215                // Check if any action in the cell is a shift
3216                if action_table[state_idx][symbol_idx]
3217                    .iter()
3218                    .any(|a| matches!(a, Action::Shift(_)))
3219                {
3220                    external_scanner_states[state_idx][external_idx] = true;
3221                }
3222            }
3223        }
3224    }
3225
3226    // Build nonterminal_to_index mapping
3227    let mut nonterminal_to_index = BTreeMap::new();
3228    for (&symbol_id, &idx) in &symbol_to_index {
3229        if nonterminal_symbols.contains(&symbol_id) {
3230            nonterminal_to_index.insert(symbol_id, idx);
3231        }
3232    }
3233
3234    let _rule_count = rules.len();
3235
3236    // Calculate proper counts for EOF symbol
3237    // token_count includes EOF (Symbol 0 in table) + all internal tokens
3238    let token_count = internal_tokens.len() + 1;
3239    let external_token_count = ext_tokens.len();
3240
3241    // Normalize action table for deterministic output
3242    normalize_action_table(&mut action_table);
3243
3244    // Build reverse map once, keep the source of truth inside the table.
3245    let mut index_to_symbol = vec![SymbolId(u16::MAX); symbol_to_index.len()];
3246    for (sym, &idx) in &symbol_to_index {
3247        index_to_symbol[idx] = *sym;
3248    }
3249
3250    let mut table = ParseTable {
3251        action_table,
3252        goto_table,
3253        symbol_metadata,
3254        state_count,
3255        symbol_count,
3256        symbol_to_index,
3257        index_to_symbol,
3258        external_scanner_states,
3259        rules,
3260        nonterminal_to_index,
3261        goto_indexing: GotoIndexing::NonterminalMap, // Will be auto-detected
3262        eof_symbol,
3263        start_symbol: original_start,
3264        grammar: grammar.clone(),
3265        initial_state: StateId(0), // Default initial state
3266        token_count,
3267        external_token_count,
3268        lex_modes: vec![
3269            LexMode {
3270                lex_state: 0,
3271                external_lex_state: 0
3272            };
3273            state_count
3274        ],
3275        extras: vec![],             // TODO: Get from grammar metadata
3276        dynamic_prec_by_rule,       // Now properly populated from grammar rules
3277        rule_assoc_by_rule,         // Now properly populated from grammar rules
3278        alias_sequences: vec![],    // TODO: Get from grammar
3279        field_names: vec![],        // TODO: Get from grammar
3280        field_map: BTreeMap::new(), // TODO: Get from grammar
3281    };
3282
3283    // Auto-detect GOTO indexing mode
3284    table.detect_goto_indexing();
3285
3286    Ok(table)
3287}
3288
3289/// Sanity check parse table for correctness
3290#[must_use = "validation result must be checked"]
3291pub fn sanity_check_tables(pt: &ParseTable) -> Result<(), String> {
3292    // 1) ACCEPT must exist on EOF in the state that has S'→S•.
3293    let eof_col = pt
3294        .symbol_to_index
3295        .get(&pt.eof_symbol)
3296        .ok_or_else(|| format!("EOF symbol {} not in symbol_to_index", pt.eof_symbol.0))?;
3297
3298    let accept_somewhere = pt.action_table.iter().any(|row| {
3299        row.get(*eof_col)
3300            .and_then(|cell| cell.iter().find(|a| matches!(a, Action::Accept)))
3301            .is_some()
3302    });
3303    if !accept_somewhere {
3304        return Err("No ACCEPT on EOF found in action table".to_string());
3305    }
3306
3307    // 2) Every production's LHS must be reachable via some goto.
3308    for pid in 0..pt.rules.len() {
3309        let lhs = pt.rules[pid].lhs;
3310        let lhs_idx = pt
3311            .symbol_to_index
3312            .get(&lhs)
3313            .ok_or_else(|| format!("LHS symbol {} not in symbol_to_index", lhs.0))?;
3314
3315        // LHS must be a non-terminal column
3316        if *lhs_idx < pt.token_count {
3317            return Err(format!(
3318                "LHS must be a non-terminal column (pid={}, lhs_idx={}, token_count={})",
3319                pid, lhs_idx, pt.token_count
3320            ));
3321        }
3322
3323        let any = pt
3324            .goto_table
3325            .iter()
3326            .any(|row| row.get(*lhs_idx).is_some_and(|s| s.0 != 0));
3327        if !any {
3328            return Err(format!("No goto(state, lhs(pid={})) present", pid));
3329        }
3330    }
3331
3332    // 3) Verify index_to_symbol is consistent with symbol_to_index
3333    for (sym, &idx) in &pt.symbol_to_index {
3334        if idx >= pt.index_to_symbol.len() {
3335            return Err(format!(
3336                "symbol_to_index has index {} but index_to_symbol has length {}",
3337                idx,
3338                pt.index_to_symbol.len()
3339            ));
3340        }
3341        if pt.index_to_symbol[idx] != *sym {
3342            return Err(format!(
3343                "index_to_symbol[{}] = {} but should be {}",
3344                idx, pt.index_to_symbol[idx].0, sym.0
3345            ));
3346        }
3347    }
3348
3349    Ok(())
3350}
3351
3352/// Add an action to the parse table, tracking conflicts
3353fn add_action_with_conflict(
3354    action_table: &mut Vec<Vec<ActionCell>>,
3355    conflicts_by_state: &mut BTreeMap<(usize, usize), Vec<Action>>,
3356    state_idx: usize,
3357    symbol_idx: usize,
3358    new_action: Action,
3359) {
3360    // Bounds check
3361    if state_idx >= action_table.len() || symbol_idx >= action_table[0].len() {
3362        panic!(
3363            "Index out of bounds in add_action_with_conflict: state_idx={}, symbol_idx={}, table_size={}x{}",
3364            state_idx,
3365            symbol_idx,
3366            action_table.len(),
3367            if action_table.is_empty() {
3368                0
3369            } else {
3370                action_table[0].len()
3371            }
3372        );
3373    }
3374
3375    let current_cell = &mut action_table[state_idx][symbol_idx];
3376
3377    // Check if this action already exists
3378    if !current_cell.iter().any(|a| action_eq(a, &new_action)) {
3379        // Add the action to the cell
3380        current_cell.push(new_action.clone());
3381
3382        // If there are now multiple actions, track as a conflict
3383        if current_cell.len() > 1 {
3384            let entry = conflicts_by_state
3385                .entry((state_idx, symbol_idx))
3386                .or_default();
3387            *entry = current_cell.clone();
3388        }
3389    }
3390}
3391
3392/// Build LR(1) automaton using the GlrResult type alias
3393///
3394/// This is a convenience wrapper that uses the crate-level Result type.
3395/// Use this when migrating code to the new error handling pattern.
3396pub fn build_lr1_automaton_res(
3397    grammar: &Grammar,
3398    first_follow: &FirstFollowSets,
3399) -> GlrResult<ParseTable> {
3400    build_lr1_automaton(grammar, first_follow)
3401}
3402
3403/// Check if two actions are equivalent
3404fn action_eq(a: &Action, b: &Action) -> bool {
3405    match (a, b) {
3406        (Action::Shift(s1), Action::Shift(s2)) => s1 == s2,
3407        (Action::Reduce(r1), Action::Reduce(r2)) => r1 == r2,
3408        (Action::Accept, Action::Accept) => true,
3409        (Action::Error, Action::Error) => true,
3410        (Action::Fork(a1), Action::Fork(a2)) => {
3411            a1.len() == a2.len() && a1.iter().zip(a2).all(|(x, y)| action_eq(x, y))
3412        }
3413        _ => false,
3414    }
3415}
3416
3417#[cfg(test)]
3418mod tests {
3419    use super::*;
3420
3421    #[test]
3422    fn test_lr_item_creation() {
3423        let item = LRItem::new(RuleId(1), 2, SymbolId(3));
3424        assert_eq!(item.rule_id, RuleId(1));
3425        assert_eq!(item.position, 2);
3426        assert_eq!(item.lookahead, SymbolId(3));
3427    }
3428
3429    #[test]
3430    fn test_lr_item_equality() {
3431        let item1 = LRItem::new(RuleId(1), 2, SymbolId(3));
3432        let item2 = LRItem::new(RuleId(1), 2, SymbolId(3));
3433        let item3 = LRItem::new(RuleId(1), 3, SymbolId(3));
3434
3435        assert_eq!(item1, item2);
3436        assert_ne!(item1, item3);
3437
3438        // Test hashing
3439        let mut set = std::collections::BTreeSet::new();
3440        set.insert(item1.clone());
3441        assert!(set.contains(&item1));
3442        assert!(set.contains(&item2));
3443        assert!(!set.contains(&item3));
3444    }
3445
3446    #[test]
3447    fn test_item_set_creation() {
3448        let mut item_set = ItemSet::new(StateId(0));
3449        let item = LRItem::new(RuleId(1), 0, SymbolId(0));
3450        item_set.add_item(item.clone());
3451
3452        assert_eq!(item_set.id, StateId(0));
3453        assert!(item_set.items.contains(&item));
3454        assert_eq!(item_set.items.len(), 1);
3455    }
3456
3457    #[test]
3458    fn test_item_set_duplicate_items() {
3459        let mut item_set = ItemSet::new(StateId(0));
3460        let item = LRItem::new(RuleId(1), 0, SymbolId(0));
3461
3462        item_set.add_item(item.clone());
3463        item_set.add_item(item.clone()); // Add same item again
3464
3465        // Should only contain one item (no duplicates)
3466        assert_eq!(item_set.items.len(), 1);
3467    }
3468
3469    #[test]
3470    fn test_first_follow_empty_grammar() {
3471        let grammar = Grammar::new("test".to_string());
3472        let first_follow = FirstFollowSets::compute(&grammar).unwrap();
3473
3474        assert!(first_follow.first.is_empty());
3475        assert!(first_follow.follow.is_empty());
3476    }
3477
3478    #[test]
3479    fn test_first_follow_simple_grammar() {
3480        let mut grammar = Grammar::new("test".to_string());
3481
3482        // Add a simple rule: S -> a
3483        let rule = Rule {
3484            lhs: SymbolId(0),                         // S
3485            rhs: vec![Symbol::Terminal(SymbolId(1))], // a
3486            precedence: None,
3487            associativity: None,
3488            fields: vec![],
3489            production_id: ProductionId(0),
3490        };
3491        grammar.rules.entry(SymbolId(0)).or_default().push(rule);
3492
3493        // Add the terminal token
3494        let token = Token {
3495            name: "a".to_string(),
3496            pattern: TokenPattern::String("a".to_string()),
3497            fragile: false,
3498        };
3499        grammar.tokens.insert(SymbolId(1), token);
3500
3501        let first_follow = FirstFollowSets::compute(&grammar).unwrap();
3502
3503        // FIRST(S) should contain 'a'
3504        assert!(first_follow.first.contains_key(&SymbolId(0)));
3505        if let Some(first_s) = first_follow.first(SymbolId(0)) {
3506            assert!(first_s.contains(1)); // Terminal 'a' has id 1
3507        }
3508
3509        // S should not be nullable
3510        assert!(!first_follow.is_nullable(SymbolId(0)));
3511    }
3512
3513    #[test]
3514    fn test_first_follow_nullable_rule() {
3515        let mut grammar = Grammar::new("test".to_string());
3516
3517        // Add a rule: S -> ε (empty rule)
3518        let rule = Rule {
3519            lhs: SymbolId(0), // S
3520            rhs: vec![],      // empty
3521            precedence: None,
3522            associativity: None,
3523            fields: vec![],
3524            production_id: ProductionId(0),
3525        };
3526        grammar.rules.entry(SymbolId(0)).or_default().push(rule);
3527
3528        let first_follow = FirstFollowSets::compute(&grammar).unwrap();
3529
3530        // S should be nullable
3531        assert!(first_follow.is_nullable(SymbolId(0)));
3532    }
3533
3534    #[test]
3535    fn test_first_of_sequence() {
3536        let mut grammar = Grammar::new("test".to_string());
3537
3538        // Add tokens
3539        let token_a = Token {
3540            name: "a".to_string(),
3541            pattern: TokenPattern::String("a".to_string()),
3542            fragile: false,
3543        };
3544        grammar.tokens.insert(SymbolId(1), token_a);
3545
3546        let token_b = Token {
3547            name: "b".to_string(),
3548            pattern: TokenPattern::String("b".to_string()),
3549            fragile: false,
3550        };
3551        grammar.tokens.insert(SymbolId(2), token_b);
3552
3553        let first_follow = FirstFollowSets::compute(&grammar).unwrap();
3554
3555        // Test FIRST of sequence [a, b]
3556        let sequence = vec![Symbol::Terminal(SymbolId(1)), Symbol::Terminal(SymbolId(2))];
3557        let first_seq = first_follow.first_of_sequence(&sequence).unwrap();
3558
3559        // Should contain only 'a' (first terminal)
3560        assert!(first_seq.contains(1));
3561        assert!(!first_seq.contains(2));
3562    }
3563
3564    #[test]
3565    fn test_action_types() {
3566        let shift = Action::Shift(StateId(1));
3567        let reduce = Action::Reduce(RuleId(2));
3568        let accept = Action::Accept;
3569        let error = Action::Error;
3570        let fork = Action::Fork(vec![shift.clone(), reduce.clone()]);
3571
3572        match shift {
3573            Action::Shift(StateId(1)) => {}
3574            _ => panic!("Expected shift action"),
3575        }
3576
3577        match reduce {
3578            Action::Reduce(RuleId(2)) => {}
3579            _ => panic!("Expected reduce action"),
3580        }
3581
3582        match accept {
3583            Action::Accept => {}
3584            _ => panic!("Expected accept action"),
3585        }
3586
3587        match error {
3588            Action::Error => {}
3589            _ => panic!("Expected error action"),
3590        }
3591
3592        match fork {
3593            Action::Fork(actions) => {
3594                assert_eq!(actions.len(), 2);
3595                assert_eq!(actions[0], shift);
3596                assert_eq!(actions[1], reduce);
3597            }
3598            _ => panic!("Expected fork action"),
3599        }
3600    }
3601
3602    #[test]
3603    fn test_action_equality() {
3604        let shift1 = Action::Shift(StateId(1));
3605        let shift2 = Action::Shift(StateId(1));
3606        let shift3 = Action::Shift(StateId(2));
3607
3608        assert_eq!(shift1, shift2);
3609        assert_ne!(shift1, shift3);
3610
3611        let reduce1 = Action::Reduce(RuleId(1));
3612        let reduce2 = Action::Reduce(RuleId(1));
3613
3614        assert_eq!(reduce1, reduce2);
3615        assert_ne!(shift1, reduce1);
3616    }
3617
3618    #[test]
3619    fn test_symbol_metadata() {
3620        let metadata = SymbolMetadata {
3621            name: "expression".to_string(),
3622            is_visible: true,
3623            is_named: true,
3624            is_supertype: false,
3625            // Additional fields required by API contracts
3626            is_terminal: false,
3627            is_extra: false,
3628            is_fragile: false,
3629            symbol_id: SymbolId(1),
3630        };
3631
3632        assert_eq!(metadata.name, "expression");
3633        assert!(metadata.is_visible);
3634        assert!(metadata.is_named);
3635        assert!(!metadata.is_supertype);
3636        assert!(!metadata.is_terminal);
3637        assert!(!metadata.is_extra);
3638        assert!(!metadata.is_fragile);
3639        assert_eq!(metadata.symbol_id, SymbolId(1));
3640    }
3641
3642    #[test]
3643    fn test_conflict_types() {
3644        let shift_reduce = ConflictType::ShiftReduce;
3645        let reduce_reduce = ConflictType::ReduceReduce;
3646
3647        assert_eq!(shift_reduce, ConflictType::ShiftReduce);
3648        assert_eq!(reduce_reduce, ConflictType::ReduceReduce);
3649        assert_ne!(shift_reduce, reduce_reduce);
3650    }
3651
3652    #[test]
3653    fn test_conflict_creation() {
3654        let conflict = Conflict {
3655            state: StateId(5),
3656            symbol: SymbolId(10),
3657            actions: vec![Action::Shift(StateId(1)), Action::Reduce(RuleId(2))],
3658            conflict_type: ConflictType::ShiftReduce,
3659        };
3660
3661        assert_eq!(conflict.state, StateId(5));
3662        assert_eq!(conflict.symbol, SymbolId(10));
3663        assert_eq!(conflict.actions.len(), 2);
3664        assert_eq!(conflict.conflict_type, ConflictType::ShiftReduce);
3665    }
3666
3667    #[test]
3668    fn test_conflict_resolver_creation() {
3669        let resolver = ConflictResolver { conflicts: vec![] };
3670
3671        assert!(resolver.conflicts.is_empty());
3672    }
3673
3674    #[test]
3675    fn test_parse_table_creation() {
3676        let parse_table = ParseTable {
3677            action_table: vec![vec![vec![Action::Error]; 5]; 3], // 3 states, 5 symbols
3678            goto_table: vec![vec![StateId(0); 5]; 3],
3679            symbol_metadata: vec![],
3680            state_count: 3,
3681            symbol_count: 5,
3682            symbol_to_index: BTreeMap::new(),
3683            index_to_symbol: vec![],
3684            external_scanner_states: vec![],
3685            rules: vec![],
3686            nonterminal_to_index: BTreeMap::new(),
3687            goto_indexing: GotoIndexing::NonterminalMap,
3688            eof_symbol: SymbolId(0),
3689            start_symbol: SymbolId(1),
3690            grammar: Grammar::new("test".to_string()),
3691            initial_state: StateId(0),
3692            token_count: 3,
3693            external_token_count: 0,
3694            lex_modes: vec![
3695                LexMode {
3696                    lex_state: 0,
3697                    external_lex_state: 0
3698                };
3699                3
3700            ],
3701            extras: vec![],
3702            dynamic_prec_by_rule: vec![],
3703            rule_assoc_by_rule: vec![],
3704            alias_sequences: vec![],
3705            field_names: vec![],
3706            field_map: BTreeMap::new(),
3707        };
3708
3709        assert_eq!(parse_table.state_count, 3);
3710        assert_eq!(parse_table.symbol_count, 5);
3711        assert_eq!(parse_table.action_table.len(), 3);
3712        assert_eq!(parse_table.goto_table.len(), 3);
3713        assert_eq!(parse_table.action_table[0].len(), 5);
3714        assert_eq!(parse_table.goto_table[0].len(), 5);
3715    }
3716
3717    #[test]
3718    fn test_lr_item_reduce_check() {
3719        let mut grammar = Grammar::new("test".to_string());
3720
3721        // Add a rule: S -> a b
3722        let rule = Rule {
3723            lhs: SymbolId(0),
3724            rhs: vec![Symbol::Terminal(SymbolId(1)), Symbol::Terminal(SymbolId(2))],
3725            precedence: None,
3726            associativity: None,
3727            fields: vec![],
3728            production_id: ProductionId(0),
3729        };
3730        grammar.rules.entry(SymbolId(0)).or_default().push(rule);
3731
3732        // Item at position 0: S -> • a b
3733        let item1 = LRItem::new(RuleId(0), 0, SymbolId(0));
3734        assert!(!item1.is_reduce_item(&grammar));
3735
3736        // Item at position 1: S -> a • b
3737        let item2 = LRItem::new(RuleId(0), 1, SymbolId(0));
3738        assert!(!item2.is_reduce_item(&grammar));
3739
3740        // Item at position 2: S -> a b •
3741        let item3 = LRItem::new(RuleId(0), 2, SymbolId(0));
3742        assert!(item3.is_reduce_item(&grammar));
3743    }
3744
3745    #[test]
3746    fn test_lr_item_next_symbol() {
3747        let mut grammar = Grammar::new("test".to_string());
3748
3749        // Add a rule: S -> a b
3750        let rule = Rule {
3751            lhs: SymbolId(0),
3752            rhs: vec![Symbol::Terminal(SymbolId(1)), Symbol::Terminal(SymbolId(2))],
3753            precedence: None,
3754            associativity: None,
3755            fields: vec![],
3756            production_id: ProductionId(0),
3757        };
3758        grammar.rules.entry(SymbolId(0)).or_default().push(rule);
3759
3760        // Item at position 0: S -> • a b
3761        let item1 = LRItem::new(RuleId(0), 0, SymbolId(0));
3762        if let Some(symbol) = item1.next_symbol(&grammar) {
3763            match symbol {
3764                Symbol::Terminal(SymbolId(1)) => {}
3765                _ => panic!("Expected terminal symbol with id 1"),
3766            }
3767        } else {
3768            panic!("Expected next symbol");
3769        }
3770
3771        // Item at position 1: S -> a • b
3772        let item2 = LRItem::new(RuleId(0), 1, SymbolId(0));
3773        if let Some(symbol) = item2.next_symbol(&grammar) {
3774            match symbol {
3775                Symbol::Terminal(SymbolId(2)) => {}
3776                _ => panic!("Expected terminal symbol with id 2"),
3777            }
3778        } else {
3779            panic!("Expected next symbol");
3780        }
3781
3782        // Item at position 2: S -> a b •
3783        let item3 = LRItem::new(RuleId(0), 2, SymbolId(0));
3784        assert!(item3.next_symbol(&grammar).is_none());
3785    }
3786
3787    #[test]
3788    fn test_item_set_collection_creation() {
3789        let collection = ItemSetCollection {
3790            sets: vec![],
3791            goto_table: IndexMap::new(),
3792            symbol_is_terminal: IndexMap::new(),
3793        };
3794
3795        assert!(collection.sets.is_empty());
3796        assert!(collection.goto_table.is_empty());
3797    }
3798
3799    #[test]
3800    fn test_glr_error_types() {
3801        let grammar_error = GLRError::GrammarError(GrammarError::InvalidFieldOrdering);
3802        let conflict_error = GLRError::ConflictResolution("Test conflict".to_string());
3803        let state_error = GLRError::StateMachine("Test state machine error".to_string());
3804
3805        match grammar_error {
3806            GLRError::GrammarError(_) => {}
3807            _ => panic!("Expected grammar error"),
3808        }
3809
3810        match conflict_error {
3811            GLRError::ConflictResolution(msg) => assert_eq!(msg, "Test conflict"),
3812            _ => panic!("Expected conflict resolution error"),
3813        }
3814
3815        match state_error {
3816            GLRError::StateMachine(msg) => assert_eq!(msg, "Test state machine error"),
3817            _ => panic!("Expected state machine error"),
3818        }
3819    }
3820
3821    #[test]
3822    fn test_item_set_equality() {
3823        let mut set1 = ItemSet::new(StateId(0));
3824        let mut set2 = ItemSet::new(StateId(1));
3825
3826        let item1 = LRItem::new(RuleId(1), 0, SymbolId(0));
3827        let item2 = LRItem::new(RuleId(2), 1, SymbolId(1));
3828
3829        set1.add_item(item1.clone());
3830        set1.add_item(item2.clone());
3831
3832        set2.add_item(item1);
3833        set2.add_item(item2);
3834
3835        // Sets should be equal based on items, not ID
3836        assert_eq!(set1.items, set2.items);
3837        assert_ne!(set1.id, set2.id);
3838    }
3839
3840    #[test]
3841    fn test_recursive_fork_normalization() {
3842        // Create a messy nested Fork action
3843        let mut action = Action::Fork(vec![
3844            Action::Fork(vec![
3845                Action::Reduce(RuleId(3)),
3846                Action::Shift(StateId(2)),
3847                Action::Reduce(RuleId(1)),
3848            ]),
3849            Action::Shift(StateId(1)),
3850            Action::Fork(vec![
3851                Action::Accept,
3852                Action::Shift(StateId(4)),
3853                Action::Error,
3854            ]),
3855        ]);
3856
3857        // Normalize it
3858        normalize_action(&mut action);
3859
3860        // Check that inner forks are sorted
3861        if let Action::Fork(ref actions) = action {
3862            // First inner fork should have actions sorted: Shift < Reduce
3863            if let Action::Fork(ref inner) = actions[0] {
3864                assert_eq!(inner.len(), 3);
3865                assert!(matches!(inner[0], Action::Shift(StateId(2))));
3866                assert!(matches!(inner[1], Action::Reduce(RuleId(1))));
3867                assert!(matches!(inner[2], Action::Reduce(RuleId(3))));
3868            }
3869
3870            // Last inner fork should have actions sorted: Shift < Accept < Error
3871            if let Action::Fork(ref inner) = actions[2] {
3872                assert_eq!(inner.len(), 3);
3873                assert!(matches!(inner[0], Action::Shift(StateId(4))));
3874                assert!(matches!(inner[1], Action::Accept));
3875                assert!(matches!(inner[2], Action::Error));
3876            }
3877        } else {
3878            panic!("Expected Fork action");
3879        }
3880    }
3881}