gazelle/
runtime.rs

1use crate::grammar::SymbolId;
2
3// ============================================================================
4// Core types — no alloc needed
5// ============================================================================
6
7/// Defines the error type for parser actions.
8///
9/// Implement this once on your action type, then use `Self::Error` in all
10/// `Action::build` return types.
11///
12/// ```ignore
13/// struct Eval;
14/// impl gazelle::ErrorType for Eval {
15///     type Error = core::convert::Infallible;
16/// }
17/// ```
18pub trait ErrorType {
19    type Error;
20}
21
22/// Marker trait for generated AST node types.
23///
24/// Implemented by codegen for each non-terminal enum. Maps the node to its
25/// output type (determined by the action type `A` baked into the node's
26/// generic parameter).
27///
28/// The output type determines how reduction works:
29/// - `Output = N` (identity): CST mode, node passes through unchanged
30/// - `Output = Ignore`: node is discarded
31/// - `Output = Box<N>`: node is auto-boxed for recursive types
32/// - Any other type: custom reduction via `Action` impl
33pub trait AstNode {
34    type Output;
35}
36
37/// Convert a grammar node to an output value.
38///
39/// Blanket implementations cover identity, `Ignore`, and `Box<N>`.
40/// Bounded on `AstNode` so that `Ignore` and `Box<N>` (which don't implement
41/// `AstNode`) can't cause overlap with the identity impl.
42#[doc(hidden)]
43pub trait FromAstNode<N: AstNode> {
44    fn from(node: N) -> Self;
45}
46
47/// Blanket: identity — node passes through unchanged (CST mode).
48impl<N: AstNode> FromAstNode<N> for N {
49    fn from(node: N) -> N {
50        node
51    }
52}
53
54/// Marker type for discarding a node during reduction.
55///
56/// Set `type Foo = Ignore` on your `Types` impl to discard nodes of that type.
57/// The blanket `Action` impl handles the rest.
58#[derive(Debug, Clone, Copy)]
59pub struct Ignore;
60
61/// Blanket: ignore — node is discarded.
62impl<N: AstNode> FromAstNode<N> for Ignore {
63    fn from(_: N) -> Self {
64        Ignore
65    }
66}
67
68/// Blanket: auto-box — node is wrapped in `Box`.
69impl<N: AstNode> FromAstNode<N> for alloc::boxed::Box<N> {
70    fn from(node: N) -> alloc::boxed::Box<N> {
71        alloc::boxed::Box::new(node)
72    }
73}
74
75/// Reduce a grammar node to its output value.
76///
77/// A blanket implementation covers any output that implements `FromAstNode<N>`
78/// (identity, `Ignore`, `Box<N>`). Custom reductions override this for specific node types.
79pub trait Action<N: AstNode>: ErrorType {
80    fn build(&mut self, node: N) -> Result<N::Output, Self::Error>;
81}
82
83/// Blanket: if `Output: FromAstNode<N>`, build is automatic.
84impl<N: AstNode, A: ErrorType> Action<N> for A
85where
86    N::Output: FromAstNode<N>,
87{
88    fn build(&mut self, node: N) -> Result<N::Output, Self::Error> {
89        Ok(FromAstNode::from(node))
90    }
91}
92
93/// An operation instruction in the parse table.
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub(crate) enum ParserOp {
96    /// Shift the token and go to the given state.
97    Shift(usize),
98    /// Reduce using the given rule index. Reduce(0) means accept.
99    Reduce(usize),
100    /// Shift/reduce conflict resolved by precedence at runtime.
101    ShiftOrReduce {
102        shift_state: usize,
103        reduce_rule: usize,
104    },
105    /// Error (no valid action).
106    Error,
107}
108
109/// Encoded operation entry for compact parse tables.
110#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111#[repr(transparent)]
112pub(crate) struct OpEntry(pub(crate) u32);
113
114impl OpEntry {
115    pub fn shift(state: usize) -> Self {
116        debug_assert!(state > 0, "Shift(0) is reserved for Error");
117        debug_assert!(state < 0x80000000, "Shift state too large");
118        OpEntry(state as u32)
119    }
120
121    pub fn reduce(rule: usize) -> Self {
122        debug_assert!(rule < 0x1000, "Reduce rule too large (max 4095)");
123        OpEntry(!(rule as u32))
124    }
125
126    pub fn shift_or_reduce(shift_state: usize, reduce_rule: usize) -> Self {
127        debug_assert!(shift_state > 0, "Shift(0) is reserved for Error");
128        debug_assert!(shift_state < 0x80000, "Shift state too large (max 19 bits)");
129        debug_assert!(reduce_rule < 0x1000, "Reduce rule too large (max 4095)");
130        OpEntry(!((reduce_rule as u32) | ((shift_state as u32) << 12)))
131    }
132
133    pub fn decode(&self) -> ParserOp {
134        let v = self.0 as i32;
135        if v > 0 {
136            ParserOp::Shift(v as usize)
137        } else if v == 0 {
138            ParserOp::Error
139        } else {
140            let payload = !self.0;
141            let r = (payload & 0xFFF) as usize;
142            let s = ((payload >> 12) & 0x7FFFF) as usize;
143            if s == 0 {
144                ParserOp::Reduce(r)
145            } else {
146                ParserOp::ShiftOrReduce {
147                    shift_state: s,
148                    reduce_rule: r,
149                }
150            }
151        }
152    }
153}
154
155/// This is the runtime representation used by the parser. It borrows slices
156/// from either static data (generated code) or a [`CompiledTable`](crate::table::CompiledTable).
157///
158/// Bison-style split base: action_base[state] and goto_base[non_terminal]
159/// share the same data/check arrays. Goto is transposed (rows=NTs, cols=states).
160#[doc(hidden)]
161#[derive(Debug, Clone, Copy)]
162pub struct ParseTable<'a> {
163    data: &'a [u32],
164    check: &'a [u32],
165    action_base: &'a [i32],
166    goto_base: &'a [i32],
167    rules: &'a [(u32, u8)],
168    pub(crate) num_terminals: u32,
169    default_reduce: &'a [u32],
170    default_goto: &'a [u32],
171}
172
173impl<'a> ParseTable<'a> {
174    /// Create a parse table from borrowed slices.
175    #[allow(clippy::too_many_arguments)]
176    pub const fn new(
177        data: &'a [u32],
178        check: &'a [u32],
179        action_base: &'a [i32],
180        goto_base: &'a [i32],
181        rules: &'a [(u32, u8)],
182        num_terminals: u32,
183        default_reduce: &'a [u32],
184        default_goto: &'a [u32],
185    ) -> Self {
186        ParseTable {
187            data,
188            check,
189            action_base,
190            goto_base,
191            rules,
192            num_terminals,
193            default_reduce,
194            default_goto,
195        }
196    }
197
198    /// Displacement table lookup: data[base[row] + col] if check matches.
199    fn lookup(&self, base: &[i32], row: usize, col: u32) -> Option<u32> {
200        let idx = (base[row] + col as i32) as usize;
201        if idx < self.check.len() && self.check[idx] == col {
202            Some(self.data[idx])
203        } else {
204            None
205        }
206    }
207
208    /// Get the action for a state and terminal. O(1) lookup.
209    pub(crate) fn action(&self, state: usize, terminal: SymbolId) -> ParserOp {
210        if let Some(v) = self.lookup(self.action_base, state, terminal.0) {
211            OpEntry(v).decode()
212        } else {
213            let rule = self.default_reduce[state];
214            if rule > 0 {
215                ParserOp::Reduce(rule as usize)
216            } else {
217                ParserOp::Error
218            }
219        }
220    }
221
222    /// Get the goto state for a state and non-terminal. O(1) lookup.
223    /// Transposed: row = non-terminal index, col = state.
224    pub(crate) fn goto(&self, state: usize, non_terminal: SymbolId) -> Option<usize> {
225        let nt_idx = (non_terminal.0 - self.num_terminals) as usize;
226        if let Some(v) = self.lookup(self.goto_base, nt_idx, state as u32) {
227            Some(v as usize)
228        } else {
229            let default = self.default_goto[nt_idx];
230            if default < u32::MAX {
231                Some(default as usize)
232            } else {
233                None
234            }
235        }
236    }
237
238    /// Get rule info: (lhs symbol ID, rhs length).
239    pub(crate) fn rule_info(&self, rule: usize) -> (SymbolId, usize) {
240        let (lhs, len) = self.rules[rule];
241        (SymbolId(lhs), len as usize)
242    }
243
244    /// Get all rules as (lhs_id, rhs_len) pairs.
245    pub(crate) fn rules(&self) -> &[(u32, u8)] {
246        self.rules
247    }
248}
249
250/// Trait for providing error context (symbol names, state/rule info).
251///
252/// Implemented by [`CompiledTable`](crate::CompiledTable), [`ErrorInfo`](crate::ErrorInfo),
253/// and the generated parser's `error_info()` static.
254pub trait ErrorContext {
255    /// Get the name for a symbol ID.
256    fn symbol_name(&self, id: SymbolId) -> &str;
257    /// Get the accessing symbol for a state (the symbol shifted/reduced to enter it).
258    fn state_symbol(&self, state: usize) -> SymbolId;
259    /// Get active LR items for a state. Each pair is `(rule_index, dot_position)`:
260    /// the dot position is how far into the rule's RHS the parser has progressed.
261    fn state_items(&self, state: usize) -> &[(u16, u8)];
262    /// Get the RHS symbol IDs for a rule (each ID indexes into `symbol_name`).
263    fn rule_rhs(&self, rule: usize) -> &[u32];
264}
265
266/// Precedence information carried by a token at parse time.
267///
268/// Used with `prec` terminals to resolve operator precedence at runtime.
269/// Higher levels bind tighter. Associativity determines behavior at equal levels.
270#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
271pub enum Precedence {
272    /// Left-associative with the given level (e.g., `+`, `-`).
273    Left(u8),
274    /// Right-associative with the given level (e.g., `=`, `**`).
275    Right(u8),
276}
277
278impl Precedence {
279    /// Get the precedence level.
280    pub fn level(&self) -> u8 {
281        match self {
282            Precedence::Left(l) | Precedence::Right(l) => *l,
283        }
284    }
285}
286
287/// How a token resolves shift/reduce conflicts at runtime.
288#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
289pub enum Resolution {
290    /// Compare precedence levels; associativity breaks ties.
291    Prec(Precedence),
292    /// Explicitly shift (e.g., pair `else` with nearest `if`).
293    Shift,
294    /// Explicitly reduce.
295    Reduce,
296}
297
298/// Parse error: either a syntax error (unexpected terminal) or a user action error.
299///
300/// The default type parameter `E = Infallible` means `ParseError` alone represents
301/// syntax-only errors (e.g. from the raw `Parser` API). Generated typed parsers
302/// return `ParseError<A::Error>` which can also carry action errors.
303///
304/// The parser remains in a valid state after a syntax error, so you can call
305/// `parser.format_error()` to get a detailed error message.
306#[derive(Debug, Clone)]
307pub enum ParseError<E = core::convert::Infallible> {
308    /// Parser encountered an unexpected terminal.
309    Syntax { terminal: SymbolId },
310    /// A user action (reduction) returned an error.
311    Action(E),
312}
313
314impl ParseError<core::convert::Infallible> {
315    /// Cast a syntax-only error (`ParseError<Infallible>`) to any `ParseError<F>`.
316    pub fn cast<F>(self) -> ParseError<F> {
317        match self {
318            ParseError::Syntax { terminal } => ParseError::Syntax { terminal },
319            ParseError::Action(e) => match e {},
320        }
321    }
322}
323
324/// A token with terminal symbol ID and optional conflict resolution.
325///
326/// Create with [`Token::new`] for simple tokens, [`Token::with_prec`]
327/// for precedence terminals, or [`Token::with_resolution`] for conflict terminals.
328#[derive(Debug, Clone, Copy)]
329pub struct Token {
330    /// The terminal symbol ID.
331    pub terminal: SymbolId,
332    /// How this token resolves shift/reduce conflicts, if at all.
333    pub resolution: Option<Resolution>,
334}
335
336impl Token {
337    /// Create a token without resolution info.
338    pub fn new(terminal: SymbolId) -> Self {
339        Self {
340            terminal,
341            resolution: None,
342        }
343    }
344
345    /// Create a token with precedence (for `prec` terminals).
346    pub fn with_prec(terminal: SymbolId, prec: Precedence) -> Self {
347        Self {
348            terminal,
349            resolution: Some(Resolution::Prec(prec)),
350        }
351    }
352
353    /// Create a token with explicit resolution (for `conflict` terminals).
354    pub fn with_resolution(terminal: SymbolId, resolution: Resolution) -> Self {
355        Self {
356            terminal,
357            resolution: Some(resolution),
358        }
359    }
360}
361
362// ============================================================================
363// Alloc-dependent types
364// ============================================================================
365
366use alloc::collections::BTreeSet;
367use alloc::collections::BinaryHeap;
368use alloc::rc::Rc;
369use alloc::string::{String, ToString};
370use alloc::{format, vec, vec::Vec};
371use core::cmp::Reverse;
372
373/// Convert `__foo_star` → `foo*`, `__foo_plus` → `foo+`, `__foo_opt` → `foo?`,
374/// `__item_sep_comma` → `item % comma`.
375fn format_sym(s: &str) -> String {
376    if let Some(base) = s.strip_prefix("__").and_then(|s| s.strip_suffix("_star")) {
377        format!("{}*", base)
378    } else if let Some(base) = s.strip_prefix("__").and_then(|s| s.strip_suffix("_plus")) {
379        format!("{}+", base)
380    } else if let Some(base) = s.strip_prefix("__").and_then(|s| s.strip_suffix("_opt")) {
381        format!("{}?", base)
382    } else if let Some(rest) = s.strip_prefix("__") {
383        if let Some(idx) = rest.find("_sep_") {
384            let base = &rest[..idx];
385            let sep = &rest[idx + 5..];
386            return format!("{} % {}", base, sep);
387        }
388        s.to_string()
389    } else {
390        s.to_string()
391    }
392}
393
394type RecoveryState<'a> = (SimState<'a>, usize, Option<(usize, Repair)>);
395
396/// Compute which symbols are nullable (can derive epsilon).
397fn compute_nullable(table: &ParseTable, ctx: &impl ErrorContext) -> Vec<bool> {
398    let rules = table.rules();
399    let num_terminals = table.num_terminals as usize;
400
401    // Find max symbol ID by scanning rules
402    let mut max_sym = num_terminals;
403    for (rule_idx, &(lhs, _)) in rules.iter().enumerate() {
404        let lhs = lhs as usize;
405        if lhs >= max_sym {
406            max_sym = lhs + 1;
407        }
408        for &sym in ctx.rule_rhs(rule_idx) {
409            let id = sym as usize;
410            if id >= max_sym {
411                max_sym = id + 1;
412            }
413        }
414    }
415
416    let mut nullable: Vec<bool> = vec![false; max_sym];
417
418    // Fixed-point iteration
419    let mut changed = true;
420    while changed {
421        changed = false;
422
423        for (rule_idx, &(lhs, _)) in rules.iter().enumerate() {
424            let lhs = lhs as usize;
425            let rhs = ctx.rule_rhs(rule_idx);
426
427            // If RHS is empty or all nullable, LHS is nullable
428            let all_nullable = rhs.iter().all(|&sym| nullable[sym as usize]);
429            if all_nullable && !nullable[lhs] {
430                nullable[lhs] = true;
431                changed = true;
432            }
433        }
434    }
435
436    nullable
437}
438
439/// Collect expected symbols from a sequence, keeping non-nullable nonterminal names.
440/// Nullable nonterminals are expanded to their first non-nullable start symbols.
441fn expected_from_sequence(
442    sequence: &[u32],
443    table: &ParseTable,
444    ctx: &impl ErrorContext,
445    nullable: &[bool],
446    num_terminals: usize,
447) -> BTreeSet<usize> {
448    let mut result = BTreeSet::new();
449    for &sym in sequence {
450        let sym_id = sym as usize;
451        if sym_id < num_terminals || !nullable.get(sym_id).copied().unwrap_or(false) {
452            // Terminal or non-nullable nonterminal: add directly
453            result.insert(sym_id);
454            break;
455        }
456        // Nullable nonterminal: expand to its first non-nullable start symbols
457        expand_nullable(
458            sym_id,
459            table,
460            ctx,
461            nullable,
462            num_terminals,
463            &mut result,
464            &mut BTreeSet::new(),
465        );
466        // Continue to next symbol since this one can be empty
467    }
468    result
469}
470
471/// Expand a nullable nonterminal to its first non-nullable start symbols.
472fn expand_nullable(
473    sym: usize,
474    table: &ParseTable,
475    ctx: &impl ErrorContext,
476    nullable: &[bool],
477    num_terminals: usize,
478    result: &mut BTreeSet<usize>,
479    visited: &mut BTreeSet<usize>,
480) {
481    if !visited.insert(sym) {
482        return;
483    }
484    for (rule_idx, &(lhs, _)) in table.rules().iter().enumerate() {
485        if lhs as usize != sym {
486            continue;
487        }
488        for &s in ctx.rule_rhs(rule_idx) {
489            let s_id = s as usize;
490            if s_id < num_terminals || !nullable.get(s_id).copied().unwrap_or(false) {
491                result.insert(s_id);
492                break;
493            }
494            expand_nullable(s_id, table, ctx, nullable, num_terminals, result, visited);
495        }
496    }
497}
498
499/// Check if a sequence is nullable.
500fn is_sequence_nullable(sequence: &[u32], nullable: &[bool]) -> bool {
501    sequence
502        .iter()
503        .all(|&sym| nullable.get(sym as usize).copied().unwrap_or(false))
504}
505
506/// Stack entry for the parser.
507
508#[derive(Debug, Clone, Copy)]
509struct StackEntry {
510    state: usize,
511    prec: Option<Precedence>,
512    /// Start token index for this subtree (for span tracking).
513    token_idx: usize,
514}
515
516/// Stack that preserves entries on logical truncation for checkpoint/restore.
517/// Physical entries are never removed — truncation only decrements the logical length.
518/// This allows restoring the stack to a previous state after spurious reductions.
519
520#[derive(Clone)]
521struct LrStack {
522    entries: Vec<StackEntry>,
523    len: usize,
524}
525
526impl LrStack {
527    fn new() -> Self {
528        Self {
529            entries: Vec::new(),
530            len: 0,
531        }
532    }
533
534    fn len(&self) -> usize {
535        self.len
536    }
537
538    fn push(&mut self, entry: StackEntry) {
539        if self.len < self.entries.len() {
540            self.entries[self.len] = entry;
541        } else {
542            self.entries.push(entry);
543        }
544        self.len += 1;
545    }
546
547    fn truncate(&mut self, new_len: usize) {
548        debug_assert!(new_len <= self.len);
549        self.len = new_len;
550    }
551
552    fn set_len(&mut self, new_len: usize) {
553        debug_assert!(new_len <= self.entries.len());
554        self.len = new_len;
555    }
556
557    fn last(&self) -> Option<&StackEntry> {
558        if self.len > 0 {
559            Some(&self.entries[self.len - 1])
560        } else {
561            None
562        }
563    }
564
565    fn to_vec(&self) -> Vec<StackEntry> {
566        self.entries[..self.len].to_vec()
567    }
568}
569
570impl core::ops::Index<usize> for LrStack {
571    type Output = StackEntry;
572    fn index(&self, idx: usize) -> &StackEntry {
573        debug_assert!(idx < self.len);
574        &self.entries[idx]
575    }
576}
577
578/// Push-based LR parser. Call [`maybe_reduce`](Self::maybe_reduce) in a loop,
579/// then [`shift`](Self::shift) each token. Rule 0 signals acceptance.
580
581#[derive(Clone)]
582pub struct Parser<'a> {
583    table: ParseTable<'a>,
584    /// Current state (top of stack, kept in "register").
585    state: StackEntry,
586    /// Previous states (rest of stack).
587    stack: LrStack,
588    /// Count of tokens shifted (for span tracking).
589    token_count: usize,
590    // Checkpoint for error reporting — the state before the current reduction
591    // sequence, restored if an error is detected after spurious reductions.
592    checkpoint_state: StackEntry,
593    checkpoint_len: usize,
594    overwrites: Vec<(usize, StackEntry)>,
595}
596
597impl<'a> Parser<'a> {
598    /// Create a new parser with the given parse table.
599    pub fn new(table: ParseTable<'a>) -> Self {
600        let initial = StackEntry {
601            state: 0,
602            prec: None,
603            token_idx: 0,
604        };
605        Self {
606            table,
607            state: initial,
608            stack: LrStack::new(),
609            token_count: 0,
610            checkpoint_state: initial,
611            checkpoint_len: 0,
612            overwrites: Vec::new(),
613        }
614    }
615
616    /// Check if a reduction should happen for the given lookahead.
617    ///
618    /// Returns `Ok(Some((rule, len, start_idx)))` if a reduction should occur.
619    /// The `start_idx` together with `token_count()` forms the half-open range `[start_idx, token_count())`.
620    /// Returns `Ok(None)` if should shift or if accepted.
621    /// Returns `Err(ParseError)` on parse error.
622    pub fn maybe_reduce(
623        &mut self,
624        lookahead: Option<Token>,
625    ) -> Result<Option<(usize, usize, usize)>, ParseError> {
626        let terminal = lookahead.map(|t| t.terminal).unwrap_or(SymbolId::EOF);
627        let resolution = lookahead.and_then(|t| t.resolution);
628
629        match self.table.action(self.state.state, terminal) {
630            ParserOp::Reduce(rule) => {
631                if rule == 0 {
632                    Ok(Some((0, 0, 0))) // Accept
633                } else {
634                    let (len, start_idx) = self.do_reduce(rule);
635                    Ok(Some((rule, len, start_idx)))
636                }
637            }
638            ParserOp::Shift(_) => Ok(None),
639            ParserOp::ShiftOrReduce { reduce_rule, .. } => {
640                let should_reduce = match (self.state.prec, resolution) {
641                    (_, Some(Resolution::Reduce)) => true,
642                    (_, Some(Resolution::Shift)) => false,
643                    (Some(sp), Some(Resolution::Prec(tp))) => {
644                        if tp.level() > sp.level() {
645                            false
646                        } else if tp.level() < sp.level() {
647                            true
648                        } else {
649                            matches!(sp, Precedence::Left(_))
650                        }
651                    }
652                    _ => false,
653                };
654
655                if should_reduce {
656                    let (len, start_idx) = self.do_reduce(reduce_rule);
657                    Ok(Some((reduce_rule, len, start_idx)))
658                } else {
659                    Ok(None)
660                }
661            }
662            ParserOp::Error => Err(ParseError::Syntax { terminal }),
663        }
664    }
665
666    /// Shift a token onto the stack.
667    ///
668    /// Call this only after `maybe_reduce` returns `Ok(None)`, meaning no
669    /// more reductions apply for the current lookahead.
670    pub fn shift(&mut self, token: Token) {
671        let next_state = match self.table.action(self.state.state, token.terminal) {
672            ParserOp::Shift(s) => s,
673            ParserOp::ShiftOrReduce { shift_state, .. } => shift_state,
674            _ => panic!("shift called when action is not shift"),
675        };
676
677        let token_prec = match token.resolution {
678            Some(Resolution::Prec(p)) => Some(p),
679            _ => None,
680        };
681        let prec = token_prec.or(self.state.prec);
682        self.stack.push(self.state);
683        self.state = StackEntry {
684            state: next_state,
685            prec,
686            token_idx: self.token_count,
687        };
688        self.token_count += 1;
689        self.save_checkpoint();
690    }
691
692    fn do_reduce(&mut self, rule: usize) -> (usize, usize) {
693        let (lhs, len) = self.table.rule_info(rule);
694
695        // Compute start token index for this reduction
696        let start_idx = match len {
697            0 => self.token_count,     // epsilon: empty range at current position
698            1 => self.state.token_idx, // single symbol in register
699            _ => self.stack[self.stack.len() - len + 1].token_idx, // first symbol in stack
700        };
701
702        if len == 0 {
703            // Epsilon: anchor is current state, push it, then set new state
704            if let Some(next_state) = self.table.goto(self.state.state, lhs) {
705                // Save entry that will be overwritten if within checkpoint range
706                if self.stack.len() < self.checkpoint_len {
707                    self.overwrites
708                        .push((self.stack.len(), self.stack.entries[self.stack.len()]));
709                }
710                self.stack.push(self.state);
711                self.state = StackEntry {
712                    state: next_state,
713                    prec: None,
714                    token_idx: start_idx,
715                };
716            }
717        } else {
718            // Truncate (entries preserved in buffer for checkpoint restore).
719            self.stack.truncate(self.stack.len() - (len - 1));
720            let anchor = *self.stack.last().unwrap();
721            // For single-symbol (len=1): preserve the symbol's own prec (e.g., PLUS → op)
722            // For multi-symbol (len>1): use anchor's prec (the "waiting" context)
723            // This handles both binary (expr OP expr) and unary (OP expr) correctly.
724            let captured_prec = if len == 1 {
725                self.state.prec
726            } else {
727                anchor.prec
728            };
729            if let Some(next_state) = self.table.goto(anchor.state, lhs) {
730                self.state = StackEntry {
731                    state: next_state,
732                    prec: captured_prec,
733                    token_idx: start_idx,
734                };
735            }
736        }
737
738        (len, start_idx)
739    }
740
741    fn save_checkpoint(&mut self) {
742        self.checkpoint_state = self.state;
743        self.checkpoint_len = self.stack.len();
744        self.overwrites.clear();
745    }
746
747    /// Restore parser state to before the current reduction sequence.
748    ///
749    /// When using a runtime grammar, call this after `maybe_reduce` returns
750    /// a reduction you want to handle yourself, to undo the speculative
751    /// stack modifications before continuing.
752    pub fn restore_checkpoint(&mut self) {
753        for &(idx, entry) in self.overwrites.iter().rev() {
754            self.stack.entries[idx] = entry;
755        }
756        self.stack.set_len(self.checkpoint_len);
757        self.state = self.checkpoint_state;
758        self.overwrites.clear();
759    }
760
761    /// Get the current LR automaton state (an opaque index into the parse table).
762    pub fn state(&self) -> usize {
763        self.state.state
764    }
765
766    /// Get the count of tokens shifted so far.
767    pub fn token_count(&self) -> usize {
768        self.token_count
769    }
770
771    /// Get the LR automaton state at a given stack depth (0 = bottom of stack).
772    pub fn state_at(&self, depth: usize) -> usize {
773        let idx = depth + 1;
774        if idx < self.stack.len() {
775            self.stack[idx].state
776        } else {
777            self.state.state
778        }
779    }
780
781    /// Format a parse error into a detailed message.
782    ///
783    /// Call this after `maybe_reduce` returns an error.
784    ///
785    /// - `display_names`: optional map from grammar names to user-friendly names
786    ///   (e.g., `"SEMI"` → `"';'"`).
787    /// - `tokens`: optional token texts by index (must include the error token at
788    ///   index [`token_count()`](Self::token_count)).
789    ///
790    /// # Adding source locations
791    ///
792    /// The parser is push-based, so the lexer knows the source position when
793    /// each token is pushed. Capture the location before pushing and use it
794    /// when formatting the error:
795    ///
796    /// ```rust,ignore
797    /// loop {
798    ///     let (line, col) = src.line_col();
799    ///     let tok = lex_one_token(&mut src);
800    ///     if let Err(ParseError::Syntax { terminal }) = parser.push(tok, &mut actions) {
801    ///         let msg = parser.format_error(terminal, ctx, None, None);
802    ///         return Err(format!("{line}:{col}: {msg}"));
803    ///     }
804    /// }
805    /// ```
806    pub fn format_error(
807        &self,
808        terminal: SymbolId,
809        ctx: &impl ErrorContext,
810        display_names: Option<&[(&str, &str)]>,
811        tokens: Option<&[&str]>,
812    ) -> String {
813        let display_names = display_names.unwrap_or(&[]);
814        let tokens = tokens.unwrap_or(&[]);
815        // Build full stack for error analysis
816        let mut full_stack: Vec<StackEntry> = self.stack.to_vec();
817        full_stack.push(self.state);
818        let error_token_idx = self.token_count;
819
820        let display = |id: SymbolId| -> &str {
821            let name = ctx.symbol_name(id);
822            display_names
823                .iter()
824                .find(|(k, _)| *k == name)
825                .map(|(_, v)| *v)
826                .unwrap_or(name)
827        };
828
829        // Helper: compute stack spans
830        let stack_spans = || -> Vec<(usize, usize, usize)> {
831            let mut spans = Vec::with_capacity(full_stack.len());
832            for i in 0..full_stack.len() {
833                let start = full_stack[i].token_idx;
834                let end = if i + 1 < full_stack.len() {
835                    full_stack[i + 1].token_idx
836                } else {
837                    error_token_idx
838                };
839                spans.push((start, end, full_stack[i].state));
840            }
841            spans
842        };
843
844        let nullable = compute_nullable(&self.table, ctx);
845        let num_terminals = self.table.num_terminals as usize;
846
847        // Collect relevant items and compute expected symbols
848        let mut relevant_items = Vec::new();
849        self.collect_relevant_items(
850            ctx,
851            self.state.state,
852            self.stack.len() + 1,
853            &mut relevant_items,
854        );
855        let expected_syms = self.compute_expected(ctx, &relevant_items, &nullable, num_terminals);
856
857        // Convert to display names
858        let mut expected: Vec<_> = expected_syms
859            .iter()
860            .map(|&sym| format_sym(display(SymbolId(sym as u32))))
861            .collect();
862        expected.sort();
863
864        // Show actual token text if available, otherwise display name
865        let found_name = tokens
866            .get(error_token_idx)
867            .copied()
868            .unwrap_or_else(|| display(terminal));
869
870        let mut msg = format!("unexpected '{}'", found_name);
871        if !expected.is_empty() {
872            msg.push_str(&format!(", expected: {}", expected.join(", ")));
873        }
874
875        // Show parsed stack with token spans
876        if !tokens.is_empty() && error_token_idx <= tokens.len() {
877            let spans = stack_spans();
878            // Skip state 0 (initial), show recent entries
879            let relevant: Vec<_> = spans
880                .into_iter()
881                .skip(1) // skip initial state
882                .filter(|(start, end, _)| end > start) // skip empty spans
883                .collect();
884
885            if !relevant.is_empty() {
886                // Build two lines: tokens and underlines with names
887                let mut token_line = String::new();
888                let mut label_line = String::new();
889
890                for (start, end, state) in relevant.iter().rev().take(4).rev() {
891                    let sym = ctx.state_symbol(*state);
892                    let name = format_sym(display(sym));
893
894                    // Get token text for this span
895                    let span_text = if end - start == 1 {
896                        tokens[*start].to_string()
897                    } else if end - start <= 3 {
898                        tokens[*start..*end].join(" ")
899                    } else {
900                        format!("{} ... {}", tokens[*start], tokens[end - 1])
901                    };
902
903                    let width = span_text.chars().count().max(name.len());
904
905                    if !token_line.is_empty() {
906                        token_line.push_str("  ");
907                        label_line.push_str("  ");
908                    }
909                    token_line.push_str(&format!("{:^width$}", span_text, width = width));
910                    label_line.push_str(&format!("{:^width$}", name, width = width));
911                }
912
913                msg.push_str(&format!("\n  {}\n  {}", token_line, label_line));
914            }
915        } else if full_stack.len() > 1 {
916            // Fallback: show grammar symbols from stack
917            let path: Vec<_> = full_stack[1..]
918                .iter()
919                .map(|e| display(ctx.state_symbol(e.state)))
920                .collect();
921            msg.push_str(&format!("\n  after: {}", path.join(" ")));
922        }
923
924        // Show informative items that explain what's expected
925        let display_items = &relevant_items;
926        let mut seen = BTreeSet::new();
927
928        for &(rule, dot) in display_items {
929            let rhs = ctx.rule_rhs(rule);
930            let lhs = self.table.rule_info(rule).0;
931            if ctx.symbol_name(lhs) == "__start" {
932                continue;
933            }
934            let lhs_name = format_sym(display(lhs));
935
936            let before: Vec<_> = rhs[..dot]
937                .iter()
938                .map(|&id| format_sym(display(SymbolId(id))))
939                .collect();
940            let after: Vec<_> = rhs[dot..]
941                .iter()
942                .map(|&id| format_sym(display(SymbolId(id))))
943                .collect();
944            let line = format!(
945                "\n  in {}: {} \u{2022} {}",
946                lhs_name,
947                before.join(" "),
948                after.join(" ")
949            );
950            if seen.insert(line.clone()) {
951                msg.push_str(&line);
952            }
953        }
954        msg
955    }
956
957    /// Collect relevant items from the current state.
958    /// Skips dot=0 closure items and __start.
959    /// Items with progress (0 < dot < len) are included directly.
960    /// Complete items (dot=len) trace back to the caller item.
961    fn collect_relevant_items(
962        &self,
963        ctx: &impl ErrorContext,
964        state: usize,
965        stack_len: usize,
966        result: &mut Vec<(usize, usize)>,
967    ) {
968        let mut visited = Vec::new();
969        self.collect_relevant_items_inner(ctx, state, stack_len, result, &mut visited);
970    }
971
972    fn collect_relevant_items_inner(
973        &self,
974        ctx: &impl ErrorContext,
975        state: usize,
976        stack_len: usize,
977        result: &mut Vec<(usize, usize)>,
978        visited: &mut Vec<(usize, usize)>,
979    ) {
980        if visited.contains(&(state, stack_len)) {
981            return;
982        }
983        visited.push((state, stack_len));
984
985        for &(rule, dot) in ctx.state_items(state) {
986            let rule = rule as usize;
987            let dot = dot as usize;
988            let rhs = ctx.rule_rhs(rule);
989            let lhs = self.table.rule_info(rule).0;
990
991            if ctx.symbol_name(lhs) == "__start" {
992                result.push((rule, dot));
993                continue;
994            }
995
996            if dot == 0 {
997                continue;
998            }
999
1000            if dot < rhs.len() {
1001                result.push((rule, dot));
1002            } else {
1003                // Complete: goto caller state on lhs and recurse
1004                let consumed = rhs.len();
1005                if stack_len > consumed {
1006                    let caller_state = self.state_at_idx(stack_len - consumed - 1);
1007                    if let Some(goto_state) = self.table.goto(caller_state, lhs) {
1008                        self.collect_relevant_items_inner(
1009                            ctx,
1010                            goto_state,
1011                            stack_len - consumed + 1,
1012                            result,
1013                            visited,
1014                        );
1015                    }
1016                }
1017            }
1018        }
1019    }
1020
1021    /// Compute expected symbols from relevant items.
1022    fn compute_expected(
1023        &self,
1024        ctx: &impl ErrorContext,
1025        items: &[(usize, usize)],
1026        nullable: &[bool],
1027        num_terminals: usize,
1028    ) -> BTreeSet<usize> {
1029        let mut expected = BTreeSet::new();
1030        let stack_len = self.stack.len() + 1;
1031
1032        for &(rule, dot) in items {
1033            let rhs = ctx.rule_rhs(rule);
1034            let lhs = self.table.rule_info(rule).0;
1035            let suffix = &rhs[dot..];
1036
1037            expected.extend(expected_from_sequence(
1038                suffix,
1039                &self.table,
1040                ctx,
1041                nullable,
1042                num_terminals,
1043            ));
1044
1045            if is_sequence_nullable(suffix, nullable) && stack_len > dot {
1046                expected.extend(self.compute_follow_from_context(
1047                    ctx,
1048                    lhs,
1049                    stack_len - dot,
1050                    nullable,
1051                    num_terminals,
1052                    &mut BTreeSet::new(),
1053                ));
1054            }
1055        }
1056
1057        expected
1058    }
1059
1060    /// Get state at a given stack index (0 = bottom, stack.len() = current state).
1061    fn state_at_idx(&self, idx: usize) -> usize {
1062        if idx < self.stack.len() {
1063            self.stack[idx].state
1064        } else {
1065            self.state.state
1066        }
1067    }
1068
1069    /// Compute what follows a nonterminal using the stack as calling context.
1070    fn compute_follow_from_context(
1071        &self,
1072        ctx: &impl ErrorContext,
1073        nonterminal: SymbolId,
1074        caller_idx: usize,
1075        nullable: &[bool],
1076        num_terminals: usize,
1077        visited: &mut BTreeSet<(usize, u32)>,
1078    ) -> BTreeSet<usize> {
1079        // Rule 0 is __start → S, nothing follows __start
1080        if nonterminal == self.table.rule_info(0).0 {
1081            let mut result = BTreeSet::new();
1082            result.insert(0); // EOF
1083            return result;
1084        }
1085
1086        if caller_idx == 0 {
1087            let mut result = BTreeSet::new();
1088            result.insert(0); // EOF
1089            return result;
1090        }
1091
1092        let caller_state = self.state_at_idx(caller_idx - 1);
1093
1094        // Use caller_idx in visited key to allow same state at different stack depths
1095        if !visited.insert((caller_idx, nonterminal.0)) {
1096            return BTreeSet::new();
1097        }
1098
1099        let mut expected = BTreeSet::new();
1100
1101        // Find items [B → γ • A δ] where A is our nonterminal
1102        for &(rule, dot) in ctx.state_items(caller_state) {
1103            let rule = rule as usize;
1104            let dot = dot as usize;
1105            let rhs = ctx.rule_rhs(rule);
1106            if dot < rhs.len() && rhs[dot] == nonterminal.0 {
1107                let suffix = &rhs[dot + 1..];
1108                let lhs = self.table.rule_info(rule).0;
1109                let consumed = dot;
1110
1111                if suffix.is_empty() {
1112                    // Nothing after A, follow what follows B
1113                    if caller_idx > consumed {
1114                        expected.extend(self.compute_follow_from_context(
1115                            ctx,
1116                            lhs,
1117                            caller_idx - consumed,
1118                            nullable,
1119                            num_terminals,
1120                            visited,
1121                        ));
1122                    } else {
1123                        expected.insert(0);
1124                    }
1125                } else {
1126                    expected.extend(expected_from_sequence(
1127                        suffix,
1128                        &self.table,
1129                        ctx,
1130                        nullable,
1131                        num_terminals,
1132                    ));
1133
1134                    if is_sequence_nullable(suffix, nullable) {
1135                        if caller_idx > consumed {
1136                            expected.extend(self.compute_follow_from_context(
1137                                ctx,
1138                                lhs,
1139                                caller_idx - consumed,
1140                                nullable,
1141                                num_terminals,
1142                                visited,
1143                            ));
1144                        } else {
1145                            expected.insert(0);
1146                        }
1147                    }
1148                }
1149            }
1150        }
1151
1152        expected
1153    }
1154
1155    /// Try to shift a token, performing any necessary reductions first.
1156    /// Returns a cloned parser in the new state, or None if the token causes an error.
1157    pub(crate) fn try_shift(&self, token: Token) -> Option<Parser<'a>> {
1158        let mut sim = self.clone();
1159        let mut iters = 0;
1160        loop {
1161            iters += 1;
1162            if iters > 1000 {
1163                return None;
1164            }
1165            match sim.maybe_reduce(Some(token)) {
1166                Ok(None) => {
1167                    sim.shift(token);
1168                    return Some(sim);
1169                }
1170                Ok(Some((0, _, _))) => return Some(sim), // accept
1171                Ok(Some(_)) => continue,                 // reduction, loop
1172                Err(_) => return None,
1173            }
1174        }
1175    }
1176
1177    /// Recover from parse errors by finding minimum-cost repairs.
1178    ///
1179    /// Takes the remaining token buffer (starting from the error token).
1180    /// Returns a list of errors found, each with the repairs applied.
1181    /// An empty result means no viable recovery was found within the search
1182    /// budget. On success the parser state is advanced past the repaired
1183    /// region and parsing can continue.
1184    pub fn recover(&mut self, buffer: &[Token]) -> Vec<RecoveryInfo> {
1185        // Fast-forward past leading tokens that shift cleanly
1186        let mut start = 0;
1187        while start < buffer.len() {
1188            if let Some(advanced) = self.try_shift(buffer[start]) {
1189                *self = advanced;
1190                start += 1;
1191            } else {
1192                break;
1193            }
1194        }
1195
1196        // Single Dijkstra search from error point through rest of buffer.
1197        // Uses SimState with Rc linked-list stack for O(1) cloning.
1198        // Priority: (cost, tokens_remaining) — at equal cost, prefer further along.
1199        let buf_len = buffer.len();
1200        let mut pq: BinaryHeap<Reverse<(usize, usize, usize)>> = BinaryHeap::new();
1201        // States: (sim, buf_pos, parent_info)
1202        let mut states: Vec<RecoveryState<'a>> = Vec::new();
1203        let mut visited: BTreeSet<(usize, usize, usize)> = BTreeSet::new();
1204
1205        states.push((SimState::from_parser(self), start, None));
1206        pq.push(Reverse((0, buf_len - start, 0)));
1207
1208        while let Some(Reverse((cost, _, state_idx))) = pq.pop() {
1209            if states.len() > 5000 {
1210                break;
1211            }
1212
1213            let sim = states[state_idx].0.clone();
1214            let pos = states[state_idx].1;
1215            let remaining = buf_len - pos;
1216
1217            let key = (sim.state, sim.depth, pos);
1218            if !visited.insert(key) {
1219                continue;
1220            }
1221
1222            // Check if we've consumed all tokens and can accept
1223            if pos >= buf_len {
1224                let mut candidate = sim.clone();
1225                if candidate.try_accept() {
1226                    let edits = Self::reconstruct_edits(&states, state_idx);
1227                    return Self::edits_to_errors(&edits, start);
1228                }
1229            }
1230
1231            // Shift current real token (cost 0)
1232            if pos < buf_len
1233                && let Some(sim2) = sim.try_shift(buffer[pos])
1234            {
1235                let idx = states.len();
1236                states.push((sim2, pos + 1, Some((state_idx, Repair::Shift))));
1237                pq.push(Reverse((cost, remaining - 1, idx)));
1238            }
1239
1240            // Insert any terminal (cost +1)
1241            let num_terms = self.table.num_terminals;
1242            for t in 1..num_terms {
1243                let token = Token::new(SymbolId(t));
1244                if let Some(sim2) = sim.try_shift(token) {
1245                    let idx = states.len();
1246                    states.push((sim2, pos, Some((state_idx, Repair::Insert(SymbolId(t))))));
1247                    pq.push(Reverse((cost + 1, remaining, idx)));
1248                }
1249            }
1250
1251            // Delete current token (cost +1)
1252            if pos < buf_len {
1253                let idx = states.len();
1254                states.push((
1255                    sim,
1256                    pos + 1,
1257                    Some((state_idx, Repair::Delete(buffer[pos].terminal))),
1258                ));
1259                pq.push(Reverse((cost + 1, remaining - 1, idx)));
1260            }
1261        }
1262
1263        // Search exhausted without finding acceptance
1264        vec![]
1265    }
1266
1267    /// Reconstruct the edit sequence by following parent pointers.
1268    fn reconstruct_edits(states: &[RecoveryState<'a>], mut idx: usize) -> Vec<Repair> {
1269        let mut edits = Vec::new();
1270        while let Some((parent, ref edit)) = states[idx].2 {
1271            edits.push(edit.clone());
1272            idx = parent;
1273        }
1274        edits.reverse();
1275        edits
1276    }
1277
1278    /// Split a flat edit sequence into grouped RecoveryInfo entries.
1279    fn edits_to_errors(edits: &[Repair], start: usize) -> Vec<RecoveryInfo> {
1280        let mut errors = Vec::new();
1281        let mut pos = start;
1282        let mut current_repairs: Vec<Repair> = Vec::new();
1283        let mut error_pos = pos;
1284
1285        for edit in edits {
1286            match edit {
1287                Repair::Shift => {
1288                    if !current_repairs.is_empty() {
1289                        errors.push(RecoveryInfo {
1290                            position: error_pos,
1291                            repairs: core::mem::take(&mut current_repairs),
1292                        });
1293                    }
1294                    pos += 1;
1295                    error_pos = pos;
1296                }
1297                Repair::Insert(t) => {
1298                    if current_repairs.is_empty() {
1299                        error_pos = pos;
1300                    }
1301                    current_repairs.push(Repair::Insert(*t));
1302                }
1303                Repair::Delete(t) => {
1304                    if current_repairs.is_empty() {
1305                        error_pos = pos;
1306                    }
1307                    current_repairs.push(Repair::Delete(*t));
1308                    pos += 1;
1309                }
1310            }
1311        }
1312        if !current_repairs.is_empty() {
1313            errors.push(RecoveryInfo {
1314                position: error_pos,
1315                repairs: current_repairs,
1316            });
1317        }
1318        errors
1319    }
1320}
1321
1322/// Lightweight parser simulation state for error recovery search.
1323/// Uses an Rc linked-list stack so cloning is O(1).
1324
1325#[derive(Clone)]
1326struct SimState<'a> {
1327    table: ParseTable<'a>,
1328    state: usize,
1329    prec: Option<Precedence>,
1330    token_idx: usize,
1331    stack: Option<Rc<SimStackNode>>,
1332    depth: usize,
1333}
1334
1335struct SimStackNode {
1336    state: usize,
1337    prec: Option<Precedence>,
1338    token_idx: usize,
1339    parent: Option<Rc<SimStackNode>>,
1340}
1341
1342impl<'a> SimState<'a> {
1343    fn from_parser(parser: &Parser<'a>) -> Self {
1344        let mut node: Option<Rc<SimStackNode>> = None;
1345        for i in 0..parser.stack.len() {
1346            node = Some(Rc::new(SimStackNode {
1347                state: parser.stack[i].state,
1348                prec: parser.stack[i].prec,
1349                token_idx: parser.stack[i].token_idx,
1350                parent: node,
1351            }));
1352        }
1353        SimState {
1354            table: parser.table,
1355            state: parser.state.state,
1356            prec: parser.state.prec,
1357            token_idx: parser.state.token_idx,
1358            stack: node,
1359            depth: parser.stack.len(),
1360        }
1361    }
1362
1363    fn try_shift(&self, token: Token) -> Option<SimState<'a>> {
1364        let mut sim = self.clone();
1365        let mut iters = 0;
1366        loop {
1367            iters += 1;
1368            if iters > 1000 {
1369                return None;
1370            }
1371            match sim.maybe_reduce(Some(token)) {
1372                Ok(true) => return Some(sim), // accept
1373                Ok(false) => {
1374                    sim.shift(token);
1375                    return Some(sim);
1376                }
1377                Err(true) => continue,     // reduced, keep going
1378                Err(false) => return None, // error
1379            }
1380        }
1381    }
1382
1383    /// Try EOF reductions until acceptance or failure.
1384    fn try_accept(&mut self) -> bool {
1385        let mut iters = 0;
1386        loop {
1387            iters += 1;
1388            if iters > 1000 {
1389                return false;
1390            }
1391            match self.maybe_reduce(None) {
1392                Ok(true) => return true,    // accept
1393                Ok(false) => return false,  // shift — can't shift EOF
1394                Err(true) => continue,      // reduced
1395                Err(false) => return false, // error
1396            }
1397        }
1398    }
1399
1400    /// Check action for lookahead. Returns:
1401    /// - Ok(true): accept (rule 0)
1402    /// - Ok(false): should shift
1403    /// - Err(true): reduced (call again)
1404    /// - Err(false): parse error
1405    fn maybe_reduce(&mut self, lookahead: Option<Token>) -> Result<bool, bool> {
1406        let terminal = lookahead.map(|t| t.terminal).unwrap_or(SymbolId::EOF);
1407        let resolution = lookahead.and_then(|t| t.resolution);
1408
1409        match self.table.action(self.state, terminal) {
1410            ParserOp::Reduce(rule) => {
1411                if rule == 0 {
1412                    return Ok(true);
1413                }
1414                self.do_reduce(rule);
1415                Err(true)
1416            }
1417            ParserOp::Shift(_) => Ok(false),
1418            ParserOp::ShiftOrReduce { reduce_rule, .. } => {
1419                let should_reduce = match (self.prec, resolution) {
1420                    (_, Some(Resolution::Reduce)) => true,
1421                    (_, Some(Resolution::Shift)) => false,
1422                    (Some(sp), Some(Resolution::Prec(tp))) => {
1423                        if tp.level() > sp.level() {
1424                            false
1425                        } else if tp.level() < sp.level() {
1426                            true
1427                        } else {
1428                            matches!(sp, Precedence::Left(_))
1429                        }
1430                    }
1431                    _ => false,
1432                };
1433                if should_reduce {
1434                    self.do_reduce(reduce_rule);
1435                    Err(true)
1436                } else {
1437                    Ok(false)
1438                }
1439            }
1440            ParserOp::Error => Err(false),
1441        }
1442    }
1443
1444    fn shift(&mut self, token: Token) {
1445        let next_state = match self.table.action(self.state, token.terminal) {
1446            ParserOp::Shift(s) => s,
1447            ParserOp::ShiftOrReduce { shift_state, .. } => shift_state,
1448            _ => panic!("shift called when action is not shift"),
1449        };
1450        let token_prec = match token.resolution {
1451            Some(Resolution::Prec(p)) => Some(p),
1452            _ => None,
1453        };
1454        let prec = token_prec.or(self.prec);
1455        self.stack = Some(Rc::new(SimStackNode {
1456            state: self.state,
1457            prec: self.prec,
1458            token_idx: self.token_idx,
1459            parent: self.stack.take(),
1460        }));
1461        self.depth += 1;
1462        self.state = next_state;
1463        self.prec = prec;
1464    }
1465
1466    fn do_reduce(&mut self, rule: usize) {
1467        let (lhs, len) = self.table.rule_info(rule);
1468        if len == 0 {
1469            let goto_state = match self.table.goto(self.state, lhs) {
1470                Some(s) => s,
1471                None => return,
1472            };
1473            self.stack = Some(Rc::new(SimStackNode {
1474                state: self.state,
1475                prec: self.prec,
1476                token_idx: self.token_idx,
1477                parent: self.stack.take(),
1478            }));
1479            self.depth += 1;
1480            self.state = goto_state;
1481            self.prec = None;
1482        } else {
1483            // Walk len-1 parent pointers to find anchor
1484            let mut anchor = self.stack.as_ref().unwrap().clone();
1485            for _ in 0..len - 1 {
1486                let parent = anchor.parent.as_ref().unwrap().clone();
1487                anchor = parent;
1488            }
1489            let captured_prec = if len == 1 { self.prec } else { anchor.prec };
1490            // Start token: for multi-symbol, walk to the deepest consumed node
1491            let start_token = if len == 1 {
1492                self.token_idx
1493            } else {
1494                // The first symbol consumed is len-1 nodes down from stack top
1495                let mut node = self.stack.as_ref().unwrap().clone();
1496                for _ in 0..len - 2 {
1497                    node = node.parent.as_ref().unwrap().clone();
1498                }
1499                node.token_idx
1500            };
1501            let goto_state = match self.table.goto(anchor.state, lhs) {
1502                Some(s) => s,
1503                None => return,
1504            };
1505            // Stack becomes anchor (it stays)
1506            self.stack = Some(anchor);
1507            self.depth -= len - 1;
1508            self.state = goto_state;
1509            self.prec = captured_prec;
1510            self.token_idx = start_token;
1511        }
1512    }
1513}
1514
1515/// Information about one error recovery point.
1516
1517#[derive(Debug, Clone)]
1518pub struct RecoveryInfo {
1519    /// Token index where the error was detected.
1520    pub position: usize,
1521    /// The repairs applied to recover.
1522    pub repairs: Vec<Repair>,
1523}
1524
1525/// A single repair action during error recovery.
1526
1527#[derive(Debug, Clone, PartialEq, Eq)]
1528pub enum Repair {
1529    /// Insert a terminal (by symbol ID).
1530    Insert(SymbolId),
1531    /// Delete a token (by symbol ID).
1532    Delete(SymbolId),
1533    /// Shift the current token (free cost — not an edit).
1534    Shift,
1535}
1536
1537/// A concrete parse tree built by [`CstParser`].
1538///
1539/// Nodes store rule indices, not names. Use [`CompiledTable`](crate::table::CompiledTable)
1540/// to resolve names for display.
1541
1542#[derive(Debug, Clone, PartialEq, Eq)]
1543pub enum Cst {
1544    /// A terminal leaf.
1545    Leaf {
1546        /// The terminal's symbol ID.
1547        symbol: SymbolId,
1548        /// Token index (from [`Parser::token_count`]).
1549        token_index: usize,
1550    },
1551    /// An interior node from reducing a grammar rule.
1552    Node {
1553        /// The rule index that produced this node.
1554        rule: usize,
1555        /// Child nodes.
1556        children: Vec<Cst>,
1557    },
1558}
1559
1560/// A parser that builds a [`Cst`] automatically.
1561///
1562/// Mirrors the `push`/`finish` pattern of generated parsers.
1563pub struct CstParser<'a> {
1564    parser: Parser<'a>,
1565    stack: Vec<Cst>,
1566}
1567
1568impl<'a> CstParser<'a> {
1569    /// Create a new tree parser with the given parse table.
1570    pub fn new(table: ParseTable<'a>) -> Self {
1571        CstParser {
1572            parser: Parser::new(table),
1573            stack: Vec::new(),
1574        }
1575    }
1576
1577    /// Push a token, performing any pending reductions.
1578    pub fn push(&mut self, token: Token) -> Result<(), ParseError> {
1579        loop {
1580            match self.parser.maybe_reduce(Some(token)) {
1581                Ok(Some((rule, len, _))) if rule > 0 => {
1582                    let children = self.stack.drain(self.stack.len() - len..).collect();
1583                    self.stack.push(Cst::Node { rule, children });
1584                }
1585                Ok(_) => break,
1586                Err(e) => {
1587                    self.stack.clear();
1588                    self.parser.restore_checkpoint();
1589                    return Err(e);
1590                }
1591            }
1592        }
1593        let token_idx = self.parser.token_count();
1594        self.stack.push(Cst::Leaf {
1595            symbol: token.terminal,
1596            token_index: token_idx,
1597        });
1598        self.parser.shift(token);
1599        Ok(())
1600    }
1601
1602    /// Finish parsing and return the parse tree.
1603    #[allow(clippy::result_large_err)]
1604    pub fn finish(mut self) -> Result<Cst, (Self, ParseError)> {
1605        loop {
1606            match self.parser.maybe_reduce(None) {
1607                Ok(Some((0, _, _))) => {
1608                    return Ok(self.stack.pop().expect("empty stack after accept"));
1609                }
1610                Ok(Some((rule, len, _))) => {
1611                    let children = self.stack.drain(self.stack.len() - len..).collect();
1612                    self.stack.push(Cst::Node { rule, children });
1613                }
1614                Ok(None) => unreachable!(),
1615                Err(e) => {
1616                    self.stack.clear();
1617                    self.parser.restore_checkpoint();
1618                    return Err((self, e));
1619                }
1620            }
1621        }
1622    }
1623
1624    /// Format a parse error into a detailed message.
1625    pub fn format_error(
1626        &self,
1627        terminal: SymbolId,
1628        ctx: &impl ErrorContext,
1629        display_names: Option<&[(&str, &str)]>,
1630        tokens: Option<&[&str]>,
1631    ) -> String {
1632        self.parser
1633            .format_error(terminal, ctx, display_names, tokens)
1634    }
1635}
1636
1637#[cfg(test)]
1638mod tests {
1639    use super::*;
1640    use crate::grammar::SymbolId;
1641    use crate::table::CompiledTable;
1642
1643    #[test]
1644    fn test_action_entry_encoding() {
1645        let shift = OpEntry::shift(42);
1646        assert_eq!(shift.decode(), ParserOp::Shift(42));
1647
1648        let reduce = OpEntry::reduce(7);
1649        assert_eq!(reduce.decode(), ParserOp::Reduce(7));
1650
1651        // Accept is Reduce(0)
1652        let accept = OpEntry::reduce(0);
1653        assert_eq!(accept.decode(), ParserOp::Reduce(0));
1654
1655        let error = OpEntry(0);
1656        assert_eq!(error.decode(), ParserOp::Error);
1657
1658        let sor = OpEntry::shift_or_reduce(10, 5);
1659        match sor.decode() {
1660            ParserOp::ShiftOrReduce {
1661                shift_state,
1662                reduce_rule,
1663            } => {
1664                assert_eq!(shift_state, 10);
1665                assert_eq!(reduce_rule, 5);
1666            }
1667            other => panic!("Expected ShiftOrReduce, got {:?}", other),
1668        }
1669    }
1670    use crate::lr::to_grammar_internal;
1671    use crate::meta::parse_grammar;
1672
1673    #[test]
1674    fn test_parse_single_token() {
1675        let grammar = to_grammar_internal(
1676            &parse_grammar(
1677                r#"
1678            start s; terminals { a } s = a => a;
1679        "#,
1680            )
1681            .unwrap(),
1682        )
1683        .unwrap();
1684
1685        let compiled = CompiledTable::build_from_internal(&grammar);
1686        let mut parser = Parser::new(compiled.table());
1687
1688        let a_id = compiled.symbol_id("a").unwrap();
1689        let token = Token::new(a_id);
1690
1691        // Should not reduce before shifting
1692        assert!(matches!(parser.maybe_reduce(Some(token)), Ok(None)));
1693
1694        // Shift the token
1695        parser.shift(token);
1696
1697        // Now reduce with EOF lookahead
1698        let result = parser.maybe_reduce(None);
1699        assert!(matches!(result, Ok(Some((1, 1, 0))))); // rule 1, len 1, start_idx 0
1700
1701        // Should be accepted now (rule 0)
1702        let result = parser.maybe_reduce(None);
1703        assert!(matches!(result, Ok(Some((0, 0, 0)))));
1704    }
1705
1706    #[test]
1707    fn test_parse_error() {
1708        let grammar = to_grammar_internal(
1709            &parse_grammar(
1710                r#"
1711            start s; terminals { a } s = a => a;
1712        "#,
1713            )
1714            .unwrap(),
1715        )
1716        .unwrap();
1717
1718        let compiled = CompiledTable::build_from_internal(&grammar);
1719        let mut parser = Parser::new(compiled.table());
1720
1721        let wrong_id = SymbolId(99);
1722        let token = Token::new(wrong_id);
1723
1724        let result = parser.maybe_reduce(Some(token));
1725        assert!(result.is_err());
1726    }
1727
1728    #[test]
1729    fn test_format_error() {
1730        let grammar = to_grammar_internal(
1731            &parse_grammar(
1732                r#"
1733            start s; terminals { a, b } s = a => a;
1734        "#,
1735            )
1736            .unwrap(),
1737        )
1738        .unwrap();
1739
1740        let compiled = CompiledTable::build_from_internal(&grammar);
1741        let mut parser = Parser::new(compiled.table());
1742
1743        // Try to parse 'b' when only 'a' is expected
1744        let b_id = compiled.symbol_id("b").unwrap();
1745        let token = Token::new(b_id);
1746
1747        let ParseError::Syntax { terminal } = parser.maybe_reduce(Some(token)).unwrap_err();
1748        let msg = parser.format_error(terminal, &compiled, None, None);
1749
1750        assert!(msg.contains("unexpected"), "msg: {}", msg);
1751        assert!(msg.contains("'b'"), "msg: {}", msg);
1752        assert!(msg.contains("s"), "msg: {}", msg);
1753    }
1754}
gazelle/runtime.rs

gazelle/
runtime.rs