Skip to main content

lexigram_core/parser/
mod.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3use std::fmt::{Display, Formatter};
4use crate::fixed_sym_table::{FixedSymTable, SymInfoTable};
5use crate::{AltId, TokenId, VarId};
6use crate::lexer::{Pos, PosSpan};
7use crate::log::Logger;
8use crate::alt::Alternative;
9
10pub(crate) mod tests;
11
12// ---------------------------------------------------------------------------------------------
13
14#[derive(Clone, Copy, Default, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
15pub enum Symbol {
16    T(TokenId),         // terminal
17    NT(VarId),          // non-terminal
18    #[default] Empty,   // empty symbol
19    End                 // end of stream
20}
21
22impl Symbol {
23    pub fn is_end(&self) -> bool {
24        matches!(self, Symbol::End)
25    }
26
27    pub fn is_empty(&self) -> bool {
28        matches!(self, Symbol::Empty)
29    }
30
31    pub fn is_t(&self) -> bool {
32        matches!(self, Symbol::T(_))
33    }
34
35    pub fn is_nt(&self) -> bool {
36        matches!(self, Symbol::NT(_))
37    }
38
39    pub fn is_t_or_nt(&self) -> bool {
40        matches!(self, Symbol::T(_) | Symbol::NT(_))
41    }
42
43    pub fn to_str<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
44        symbol_table.map(|t| t.get_str(self)).unwrap_or_else(|| self.to_string())
45    }
46
47    /// Converts the symbol to string, using the symbol table if available, and
48    /// surrounding it with quotes if it's a string literal.
49    pub fn to_str_quote<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
50        symbol_table.map(|t| t.get_name_quote(self)).unwrap_or_else(|| self.to_string())
51    }
52
53    pub fn to_str_name<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
54        symbol_table.map(|t| t.get_name(self)).unwrap_or_else(|| self.to_string())
55    }
56
57    /// Converts the symbol to string, using the symbol table if available.
58    pub fn to_str_ext<T: SymInfoTable>(&self, symbol_table: Option<&T>, ext: &String) -> String {
59        let mut result = self.to_str(symbol_table);
60        if let Some(t) = symbol_table {
61            if t.is_symbol_t_data(self) {
62                result.push_str(&format!("({ext})"));
63            }
64        }
65        result
66    }
67
68    /// Converts to symbols used in `sym!` and other related macros of the `lexigram` crate.
69    pub fn to_macro_item(&self) -> String {
70        match self {
71            Symbol::Empty => "e".to_string(),
72            Symbol::T(x) => format!("t {x}"),
73            Symbol::NT(x) => format!("nt {x}"),
74            Symbol::End => "end".to_string(),
75        }
76    }
77}
78
79impl Display for Symbol {
80    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
81        match self {
82            Symbol::Empty => write!(f, "ε"),
83            Symbol::T(id) => write!(f, ":{id}"),
84            Symbol::NT(id) => write!(f, "{id}"),
85            Symbol::End => write!(f, "$"),
86        }
87    }
88}
89
90#[derive(Clone, Copy, PartialEq, Debug)]
91pub enum OpCode {
92    Empty,              // empty symbol
93    T(TokenId),         // terminal
94    NT(VarId),          // nonterminal
95    Loop(VarId),        // loop to same nonterminal
96    Exit(VarId),        // exit nonterminal
97    Hook,               // terminal hook callback
98    End,                // end of stream
99}
100
101
102impl Display for OpCode {
103    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
104        match self {
105            OpCode::Empty => write!(f, "ε"),
106            OpCode::T(t) => write!(f, ":{t}"),
107            OpCode::NT(v) => write!(f, "►{v}"),
108            OpCode::Loop(v) => write!(f, "●{v}"),
109            OpCode::Exit(v) => write!(f, "◄{v}"),
110            OpCode::Hook => write!(f, "▲"),
111            OpCode::End => write!(f, "$"),
112        }
113    }
114}
115
116impl OpCode {
117    pub fn is_loop(&self) -> bool {
118        matches!(self, OpCode::Loop(_))
119    }
120
121    pub fn is_empty(&self) -> bool {
122        matches!(self, OpCode::Empty)
123    }
124
125    pub fn has_span(&self) -> bool {
126        matches!(self, OpCode::T(_) | OpCode::NT(_))
127    }
128
129    pub fn matches(&self, s: Symbol) -> bool {
130        match self {
131            OpCode::Empty => s == Symbol::Empty,
132            OpCode::T(t) => s == Symbol::T(*t),
133            OpCode::NT(v) => s == Symbol::NT(*v),
134            OpCode::End => s == Symbol::End,
135            OpCode::Loop(_)
136            | OpCode::Exit(_)
137            | OpCode::Hook => false,
138        }
139    }
140
141    pub fn to_str<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
142        if let Some(t) = symbol_table {
143            match self {
144                OpCode::Empty => "ε".to_string(),
145                OpCode::T(v) => format!("{}{}", t.get_t_str(*v), if t.is_token_data(*v) { "!" } else { "" }),
146                OpCode::NT(v) => format!("►{}", t.get_nt_name(*v)),
147                OpCode::Loop(v) => format!("●{}", t.get_nt_name(*v)),
148                OpCode::Exit(f) => format!("◄{f}"),
149                OpCode::Hook => "▲".to_string(),
150                OpCode::End => "$".to_string(),
151            }
152        } else {
153            self.to_string()
154        }
155    }
156
157    pub fn to_str_name<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
158        if let Some(tbl) = symbol_table {
159            match self {
160                OpCode::T(v) => tbl.get_t_str(*v),
161                _ => self.to_str(symbol_table),
162            }
163        } else {
164            self.to_string()
165        }
166    }
167
168    pub fn to_str_quote<T: SymInfoTable>(&self, symbol_table: Option<&T>) -> String {
169        if let Some(t) = symbol_table {
170            match self {
171                OpCode::T(v) => format!("{}{}", Symbol::T(*v).to_str_quote(symbol_table), if t.is_token_data(*v) { "!" } else { "" }),
172                _ => self.to_str(symbol_table)
173            }
174        } else {
175            self.to_string()
176        }
177    }
178
179    pub fn to_str_ext<T: SymInfoTable>(&self, symbol_table: Option<&T>, ext: &String) -> String {
180        let mut result = self.to_str(symbol_table);
181        if let Some(t) = symbol_table {
182            if let OpCode::T(tok) = self {
183                if t.is_symbol_t_data(&Symbol::T(*tok)) {
184                    result.push_str(&format!("({ext})"));
185                }
186            }
187        }
188        result
189    }
190}
191
192impl From<Symbol> for OpCode {
193    fn from(value: Symbol) -> Self {
194        match value {
195            Symbol::Empty => OpCode::Empty,
196            Symbol::T(t) => OpCode::T(t),
197            Symbol::NT(v) => OpCode::NT(v),
198            Symbol::End => OpCode::End,
199        }
200    }
201}
202
203#[cfg(feature = "test_utils")]
204impl OpCode {
205    pub fn to_macro_item(&self) -> String {
206        match self {
207            OpCode::Empty => "e".to_string(),
208            OpCode::T(t) => format!("t {t}"),
209            OpCode::NT(v) => format!("nt {v}"),
210            OpCode::Loop(v) => format!("loop {v}"),
211            OpCode::Exit(v) => format!("exit {v}"),
212            OpCode::Hook => "hook".to_string(),
213            OpCode::End => "end".to_string(),
214        }
215    }
216}
217
218// ---------------------------------------------------------------------------------------------
219
220/// Codes returned by the [check_abort_request(...)](ListenerWrapper::check_abort_request) method of
221/// the listener (via the wrapper pass-through).
222#[derive(Clone, Copy, PartialEq, Debug)]
223pub enum Terminate {
224    /// Normal behaviour: continues parsing the text
225    None,
226    /// Irrecoverable error: stops parsing, calls the listener abort method, and returns an error
227    Abort,
228    /// Stops parsing, calls the listener exit method, and returns an Ok
229    Conclude,
230}
231
232/// Action calls to the wrapper with the method [ListenerWrapper::switch]. The wrapper translates the
233/// action accordingly to the current nonterminal and alternative; for example, by calling the
234/// appropriate listener callback.
235#[derive(PartialEq, Debug)]
236pub enum Call {
237    /// Enters a new nonterminal rule. The alternative is already known, but the values of the symbols
238    /// in that alternative haven't been scanned yet.
239    ///
240    /// This can be used to initialize the listener's variables when a particular rule is about to be
241    /// parsed (the listener methods associated with this action are normally optional since no
242    /// information is returned to the wrapper).
243    ///
244    /// The wrapper also uses this call to initialize stack items like accumulators used in rule loops
245    /// like `a -> b*`.
246    Enter,
247    /// Re-enters a loop nonterminal. This is currently not used in the wrapper.
248    Loop,
249    /// Exits an alternative, once all the symbols in it have been parsed: nonterminals and terminals.
250    ///
251    /// This is typically used to call an exit method of the listener and evaluate its value when it
252    /// has one.
253    Exit,
254    /// This action is used in two situations:
255    /// * when the parsing of the top rule has completed normally. In that case, the wrapper
256    ///   calls the [exit(...)] method of the listener (done in the generated code).
257    /// * when the parsing is [aborted](Terminate::Abort) or [concluded](Terminate::Conclude) in
258    ///   reaction to an [check_abort_request(...)](ListenerWrapper::check_abort_request) call. In
259    ///   that case, the wrapper calls the [abort(...)] method of the listener (done in the generated
260    ///   code).
261    ///
262    /// The [Terminate] value it contains tells the wrapper which of those eventualities has
263    /// occurred.
264    End(Terminate)
265}
266
267pub trait ListenerWrapper {
268    /// Calls the listener to execute Enter, Loop, Exit, and End actions.
269    #[allow(unused_variables)]
270    fn switch(&mut self, call: Call, nt: VarId, alt_id: AltId, t_data: Option<Vec<String>>) {}
271
272    /// Checks if the listener requests an abort (wrapper pass-through). This method is called at the end of
273    /// each parser iteration. If an error is too difficult to recover from, the listener can set a flag that
274    /// tells to return a [Terminate::Abort] on the next call, and implement this method to return
275    /// the appropriate status.
276    ///
277    /// In that case, the parser
278    /// * calls [abort(...)](ListenerWrapper::abort)
279    /// * calls [switch([Call::End]([Terminate::Abort]))](ListenerWrapper::switch) (if there was no syntax error)
280    /// * returns [ParserError::AbortRequest].
281    fn check_abort_request(&self) -> Terminate { Terminate::None }
282
283    /// Aborts the parsing.
284    fn abort(&mut self) {}
285
286    /// Gets access to the listener's log to report possible errors and information about the parsing.
287    fn get_log_mut(&mut self) -> &mut impl Logger;
288
289    /// Pushes a location span onto the (optional) span stack
290    #[allow(unused_variables)]
291    fn push_span(&mut self, span: PosSpan) {}
292
293    /// Checks that the stack is empty (the parser only checks that the stack is empty after successfully parsing a text)
294    fn is_stack_empty(&self) -> bool { true }
295
296    /// Checks that the stack_t is empty (the parser only checks that the stack is empty after successfully parsing a text)
297    fn is_stack_t_empty(&self) -> bool { true }
298
299    /// Checks that the stack_span is empty (the parser only checks that the stack is empty after successfully parsing a text)
300    fn is_stack_span_empty(&self) -> bool { true }
301
302    /// Allows to dynamically translate a token in the listener (wrapper pass-through)
303    #[allow(unused_variables)]
304    fn hook(&mut self, token: TokenId, text: &str, span: &PosSpan) -> TokenId {
305        token
306    }
307
308    /// Allows to intercept any token in the listener (wrapper pass-through)
309    #[allow(unused_variables)]
310    fn intercept_token(&mut self, token: TokenId, text: &str, span: &PosSpan) -> TokenId {
311        token
312    }
313}
314
315// ---------------------------------------------------------------------------------------------
316
317pub type ParserToken = (TokenId, String, PosSpan);
318
319/// Code of the error that occurred during the parsing, returned by the
320/// [parse_stream(...)](Parser::parse_stream) method of the parser.
321#[derive(PartialEq, Debug)]
322pub enum ParserError {
323    /// A syntax error was met. Either
324    /// * The next terminal of the parsed text doesn't match the expected one in the current rule
325    ///   alternative; for example, a rule `assign -> "let" Id "=" expr ";";` has just successfully
326    ///   scanned the terminal `"let"`, but the next one isn't `Id`.
327    /// * The next symbol doesn't correspond to any correct option for the next nonterminal (
328    ///   in other words, there is no entry in the parsing table for that combination). For example,
329    ///   in the same rule as above, the terminal `"="` has just been scanned successfully, but `expr`
330    ///   doesn't begin with the next one.
331    ///
332    /// This error is returned only when the parser doesn't try to recover from syntax errors; this
333    /// option is set with the [set_try_recover(...)](Parser::set_try_recover) method and is
334    /// enabled by default.
335    ///
336    /// See also [ParserError::TooManyErrors].
337    SyntaxError,
338    /// Too many syntax errors were met, either
339    /// * during the parsing. The limit is set by the constant [Parser::MAX_NBR_RECOVERS].
340    /// * by the lexer. The limit is set by the constant [Parser::MAX_NBR_LEXER_ERRORS].
341    ///
342    /// This error is returned only when the parser tries to recover from syntactic or lexical errors;
343    /// this option is set with the [set_try_recover(...)](Parser::set_try_recover) method and is
344    /// enabled by default.
345    ///
346    /// See also [ParserError::SyntaxError].
347    TooManyErrors,
348    /// The parser has reached an irrecoverable error, after trying to recover from a syntax error and
349    /// encountering the end of the text.
350    Irrecoverable,
351    /// The parser has reached the end of the top rule, but there are still terminals coming from
352    /// the lexer.
353    ///
354    /// Note that if the text is expected to contain something else after the part that must be parsed,
355    /// it is possible to tell the parser to conclude the parsing without looking any further. This
356    /// can be done in the listener with the [check_abort_request(...)] performed regularly by the
357    /// parser. See the [examples/terminate] parser to see how it can be used.
358    ExtraSymbol,
359    /// The parser has encountered the end of the text, but the top rule hasn't been fully parsed.
360    UnexpectedEOS,
361    /// This is an internal error that isn't supposed to happen.
362    UnexpectedError,
363    /// The text has been fully parsed, but syntax errors were encountered by the parser (and could
364    /// be recovered from).
365    ///
366    /// See also [ParserError::SyntaxError].
367    EncounteredErrors,
368    /// An [Abort](Terminate::Abort) was returned by the [check_abort_request(...)] method of the
369    /// listener.
370    AbortRequest,
371}
372
373impl Display for ParserError {
374    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
375        write!(f, "{}", match self {
376            ParserError::SyntaxError => "syntax error",
377            ParserError::TooManyErrors => "too many errors while trying to recover",
378            ParserError::Irrecoverable => "irrecoverable syntax error",
379            ParserError::ExtraSymbol => "extra symbol after end of parsing",
380            ParserError::UnexpectedEOS => "unexpected end of stream",
381            ParserError::UnexpectedError => "unexpected error",
382            ParserError::EncounteredErrors => "encountered errors",
383            ParserError::AbortRequest => "abort request",
384        })
385    }
386}
387
388/// Parser object. The [new(...)](Parser::new) method creates a new instance.
389pub struct Parser<'a> {
390    num_nt: usize,
391    num_t: usize,
392    alt_var: &'a [VarId],
393    alts: Vec<Alternative>,
394    opcodes: Vec<Vec<OpCode>>,
395    init_opcodes: Vec<OpCode>,
396    table: &'a [AltId],
397    symbol_table: FixedSymTable,
398    start: VarId,
399    try_recover: bool,          // tries to recover from syntactical errors
400}
401
402impl<'a> Parser<'a> {
403    /// Maximum number of error recoveries attempted when meeting a syntax error
404    pub const MAX_NBR_RECOVERS: u32 = 5;
405    pub const MAX_NBR_LEXER_ERRORS: u32 = 3;
406
407    pub fn new(
408        num_nt: usize,
409        num_t: usize,
410        alt_var: &'a [VarId],
411        alts: Vec<Alternative>,
412        opcodes: Vec<Vec<OpCode>>,
413        init_opcodes: Vec<OpCode>,
414        table: &'a [AltId],
415        symbol_table: FixedSymTable,
416        start: VarId,
417    ) -> Self {
418        Parser { num_nt, num_t, alt_var, alts, opcodes, init_opcodes, table, symbol_table, start, try_recover: true }
419    }
420
421    /// Gets a reference to the symbol table, if one is attached.
422    pub fn get_symbol_table(&self) -> Option<&FixedSymTable> {
423        Some(&self.symbol_table)
424    }
425
426    /// Sets the top nonterminal. The parser ends the parsing once the corresponding rule has been entirely parsed.
427    pub fn set_start(&mut self, start: VarId) {
428        assert!(self.num_nt > start as usize);
429        self.start = start;
430    }
431
432    /// Enables or disables the recovery from syntactic or lexical errors.
433    ///
434    /// See also [ParserError::TooManyErrors] and [ParserError::SyntaxError].
435    pub fn set_try_recover(&mut self, try_recover: bool) {
436        self.try_recover = try_recover;
437    }
438
439    /// Determines with a quick simulation if `sym` is accepted by the grammar with the current
440    /// `stack` and current stack symbol `stack_sym`.
441    fn simulate(&self, stream_sym: Symbol, mut stack: Vec<OpCode>, mut stack_sym: OpCode) -> bool {
442        const VERBOSE: bool = false;
443        let error_skip_alt_id = self.alt_var.len() as AltId;
444        let end_var_id = (self.num_t - 1) as VarId;
445        if VERBOSE { print!("  next symbol could be: {}?", stream_sym.to_str(self.get_symbol_table())); }
446
447        let ok = loop {
448            match (stack_sym, stream_sym) {
449                (OpCode::NT(var), _) | (OpCode::Loop(var), _) => {
450                    let sr = if let Symbol::T(sr) = stream_sym { sr } else { end_var_id };
451                    let alt_id = self.table[var as usize * self.num_t + sr as usize];
452                    if alt_id >= error_skip_alt_id {
453                        break false;
454                    }
455                    stack.extend(self.opcodes[alt_id as usize].clone());
456                    stack_sym = stack.pop().unwrap();
457                }
458                (OpCode::Exit(_), _) => {
459                    stack_sym = stack.pop().unwrap();
460                }
461                (OpCode::T(sk), Symbol::T(sr)) => {
462                    break sk == sr;
463                }
464                (OpCode::End, Symbol::End) => {
465                    break true;
466                }
467                (_, _) => {
468                    break false;
469                }
470            }
471        };
472        if VERBOSE { println!(" {}", if ok { "yes" } else { "no" }); }
473        ok
474    }
475
476    /// Parses the entire `stream`, calling the (listener) [wrapper](ListenerWrapper) with the
477    /// [actions](Call) that correspond to the parser events.
478    ///
479    /// Returns `Ok(())` if the whole stream could be successfully parsed, or an
480    /// [error](ParserError) if it couldn't.
481    ///
482    /// All errors are reported in the wrapper's log. Usually, the wrapper simply transmits the
483    /// reports to the user listener's log (done in the generated code).
484    pub fn parse_stream<I, L>(&mut self, wrapper: &mut L, mut stream: I) -> Result<(), ParserError>
485        where I: Iterator<Item=ParserToken>,
486              L: ListenerWrapper,
487    {
488        /// Outputs debug messages on stdout.
489        const VERBOSE: bool = false;
490
491        /// Delays the capture of the next token and the call to `intercept_token()` if it's possible.
492        /// That allows to call as many `exit_*()` methods as possible in the listener, and so to
493        /// update any information that may impact the translation of the next token.
494        const DELAY_STREAM_INTERCEPTION: bool = cfg!(feature = "delay_stream_interception");
495
496        let sym_table: Option<&FixedSymTable> = Some(&self.symbol_table);
497        let mut stack = self.init_opcodes.clone();
498        let mut stack_t = Vec::<String>::new();
499        let error_skip_alt_id = self.alt_var.len() as AltId;
500        let error_pop_alt_id = error_skip_alt_id + 1;
501        if VERBOSE { println!("skip = {error_skip_alt_id}, pop = {error_pop_alt_id}"); }
502        let mut recover_mode = false;
503        let mut nbr_recovers = 0;
504        let mut nbr_lexer_errors = 0;
505        let end_var_id = (self.num_t - 1) as VarId;
506        let mut stack_sym = stack.pop().unwrap();
507        let mut stream_n = 0;
508        let mut stream_pos = None;
509        let mut stream_span = PosSpan::empty();
510        let mut stream_sym = Symbol::default(); // must set fake value to comply with borrow checker
511        let mut stream_str = String::default(); // must set fake value to comply with borrow checker
512        let mut advance_stream = true;
513        let mut hook_active = false;
514        loop {
515            if advance_stream &&
516                (!DELAY_STREAM_INTERCEPTION                     // if optimization == false, only checks advance_stream
517                    || (!matches!(stack_sym, OpCode::Exit(_))   // exit => needn't advance, unless...
518                    || stream_sym == Symbol::Empty))            // Symbol::Empty => must advance no matter what
519            {
520                stream_n += 1;
521                (stream_sym, stream_str) = stream.next().map(|(t, s, span)| {
522                    // reads the next token and possibly transforms it in intercept_token() if it's used
523                    // (if intercept_token() isn't used, it's optimized away)
524                    let new_t = wrapper.intercept_token(t, &s, &span);
525                    stream_pos = Some(span.first_forced());
526                    stream_span = span;
527                    (Symbol::T(new_t), s)
528                }).unwrap_or_else(|| {
529                    // checks if there's an error code after the end
530                    if let Some((_t, s, _span)) = stream.next() {
531                        (Symbol::Empty, s)
532                    } else {
533                        (Symbol::End, String::new())
534                    }
535                });
536                advance_stream = false;
537                hook_active = true;
538            }
539            if VERBOSE {
540                println!("{:-<40}", "");
541                println!("input ({stream_n}{}): {}   stack_t: [{}]   stack: [{}]   current: {}",
542                         if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() },
543                         stream_sym.to_str_ext(sym_table, &stream_str),
544                         stack_t.join(", "),
545                         stack.iter().map(|s| s.to_str(sym_table)).collect::<Vec<_>>().join(" "),
546                         stack_sym.to_str_name(sym_table));
547            }
548            match (stack_sym, stream_sym) {
549                (_, Symbol::Empty) => {
550                    // lexer couldn't recognize the next symbol
551                    if VERBOSE { println!("lexer error: {stream_str}"); }
552                    wrapper.get_log_mut().add_error(format!("lexical error: {stream_str}"));
553                    nbr_lexer_errors += 1;
554                    if nbr_lexer_errors >= Self::MAX_NBR_LEXER_ERRORS {
555                        wrapper.get_log_mut().add_note(format!("too many lexical errors ({nbr_lexer_errors}), giving up"));
556                        wrapper.abort();
557                        return Err(ParserError::TooManyErrors);
558                    }
559                    advance_stream = true;
560                }
561                (OpCode::Hook, Symbol::T(t)) => {
562                    if hook_active {
563                        let new_t = wrapper.hook(t, stream_str.as_str(), &stream_span);
564                        stream_sym = Symbol::T(new_t);
565                        hook_active = false;
566                    }
567                    stack_sym = stack.pop().unwrap();
568                }
569                (OpCode::Hook, _) => {
570                    // hooks may happen on other alternative symbols, in which case they're irrelevant
571                    stack_sym = stack.pop().unwrap();
572                }
573                (OpCode::NT(var), _) | (OpCode::Loop(var), _) => {
574                    let sr = if let Symbol::T(sr) = stream_sym { sr } else { end_var_id };
575                    let alt_id = self.table[var as usize * self.num_t + sr as usize];
576                    if VERBOSE {
577                        println!("- table[{var}, {sr}] = {alt_id}: {} -> {}",
578                                 Symbol::NT(var).to_str(self.get_symbol_table()),
579                                 if alt_id >= error_skip_alt_id {
580                                     "ERROR".to_string()
581                                 } else if let Some(a) = self.alts.get(alt_id as usize) {
582                                     a.to_str(sym_table)
583                                 } else {
584                                     "(alternative)".to_string()
585                                 });
586                    }
587                    if !recover_mode && alt_id >= error_skip_alt_id {
588                        let expected = (0..self.num_t as VarId).filter(|t| self.table[var as usize * self.num_t + *t as usize] < error_skip_alt_id)
589                            .filter(|t| self.simulate(Symbol::T(*t), stack.clone(), stack_sym))
590                            .map(|t| format!("'{}'", if t < end_var_id { Symbol::T(t).to_str(sym_table) } else { "<EOF>".to_string() }))
591                            .collect::<Vec<_>>().join(", ");
592                        let stream_sym_txt = if stream_sym.is_end() { "end of stream".to_string() } else { format!("input '{}'", stream_sym.to_str(sym_table)) };
593                        let msg = format!("syntax error: found {stream_sym_txt} instead of {expected} while parsing '{}'{}",
594                                          stack_sym.to_str(sym_table),
595                                          if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() });
596                        if self.try_recover {
597                            wrapper.get_log_mut().add_error(msg);
598                            if nbr_recovers >= Self::MAX_NBR_RECOVERS {
599                                wrapper.get_log_mut().add_note(format!("too many errors ({nbr_recovers}), giving up"));
600                                wrapper.abort();
601                                return Err(ParserError::TooManyErrors);
602                            }
603                            nbr_recovers += 1;
604                            recover_mode = true;
605                        } else {
606                            wrapper.get_log_mut().add_error(msg);
607                            wrapper.abort();
608                            return Err(ParserError::SyntaxError);
609                        }
610                    }
611                    if recover_mode {
612                        if VERBOSE { println!("!NT {} <-> {}, alt_id = {alt_id}", stack_sym.to_str(self.get_symbol_table()), stream_sym.to_str(self.get_symbol_table())); }
613                        if alt_id == error_skip_alt_id {
614                            if stream_sym == Symbol::End {
615                                let msg = "irrecoverable error, reached end of stream".to_string();
616                                if VERBOSE { println!("(recovering) {msg}"); }
617                                wrapper.get_log_mut().add_note(msg);
618                                wrapper.abort();
619                                return Err(ParserError::Irrecoverable);
620                            }
621                            if VERBOSE { println!("(recovering) skipping token {}", stream_sym.to_str(self.get_symbol_table())); }
622                            advance_stream = true;
623                        } else if alt_id == error_pop_alt_id {
624                            if VERBOSE { println!("(recovering) popping {}", stack_sym.to_str(self.get_symbol_table())); }
625                            stack_sym = stack.pop().unwrap();
626                        } else if alt_id < error_skip_alt_id {
627                            recover_mode = false;
628                            let pos_str = if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() };
629                            wrapper.get_log_mut().add_note(format!("resynchronized on '{}'{pos_str}",
630                                                                   stream_sym.to_str(self.get_symbol_table())));
631                            if VERBOSE { println!("(recovering) resynchronized{pos_str}"); }
632                        } else {
633                            panic!("illegal alt_id {alt_id}")
634                        }
635                    }
636                    if !recover_mode {
637                        let call = if stack_sym.is_loop() { Call::Loop } else { Call::Enter };
638                        let t_data = std::mem::take(&mut stack_t);
639                        if VERBOSE {
640                            let f_str = if let Some(f) = &self.alts.get(alt_id as usize) {
641                                f.to_str(sym_table)
642                            } else {
643                                "(alternative)".to_string()
644                            };
645                            println!(
646                                "- to stack: [{}]",
647                                self.opcodes[alt_id as usize].iter().filter(|s| !s.is_empty()).map(|s| s.to_str(sym_table))
648                                    .collect::<Vec<_>>().join(" "));
649                            println!(
650                                "- {} {} -> {f_str} ({}): [{}]",
651                                if stack_sym.is_loop() { "LOOP" } else { "ENTER" },
652                                Symbol::NT(self.alt_var[alt_id as usize]).to_str(sym_table), t_data.len(), t_data.join(" "));
653                        }
654                        if nbr_recovers == 0 {
655                            wrapper.switch(call, var, alt_id, Some(t_data));
656                        }
657                        stack.extend(self.opcodes[alt_id as usize].clone());
658                        stack_sym = stack.pop().unwrap();
659                    }
660                }
661                (OpCode::Exit(alt_id), _) => {
662                    let var = self.alt_var[alt_id as usize];
663                    let t_data = std::mem::take(&mut stack_t);
664                    if VERBOSE {
665                        println!(
666                            "- EXIT {} syn ({}): [{}]",
667                            Symbol::NT(var).to_str(sym_table), t_data.len(), t_data.join(" "));
668                    }
669                    if nbr_recovers == 0 {
670                        wrapper.switch(Call::Exit, var, alt_id, Some(t_data));
671                    }
672                    stack_sym = stack.pop().unwrap();
673                }
674                (OpCode::T(sk), Symbol::T(sr)) => {
675                    if !recover_mode && sk != sr {
676                        let msg = format!(
677                            "syntax error: found input '{}' instead of '{}'{}",
678                            stream_sym.to_str(sym_table),
679                            Symbol::T(sk).to_str(sym_table),
680                            if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() });
681                        if self.try_recover {
682                            wrapper.get_log_mut().add_error(msg);
683                            if nbr_recovers >= Self::MAX_NBR_RECOVERS {
684                                wrapper.get_log_mut().add_note(format!("too many errors ({nbr_recovers}), giving up"));
685                                wrapper.abort();
686                                return Err(ParserError::TooManyErrors);
687                            }
688                            nbr_recovers += 1;
689                            recover_mode = true;
690                        } else {
691                            wrapper.get_log_mut().add_error(msg);
692                            wrapper.abort();
693                            return Err(ParserError::SyntaxError);
694                        }
695                    }
696                    if recover_mode {
697                        if VERBOSE { println!("!T {} <-> {}", Symbol::T(sk).to_str(self.get_symbol_table()), stream_sym.to_str(self.get_symbol_table())); }
698                        if sk == sr {
699                            recover_mode = false;
700                            let pos_str = if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() };
701                            wrapper.get_log_mut().add_note(format!("resynchronized on '{}'{pos_str}",
702                                                                   stream_sym.to_str(self.get_symbol_table())));
703                            if VERBOSE { println!("(recovering) resynchronized{pos_str}"); }
704                        } else {
705                            if VERBOSE { println!("(recovering) popping {}", Symbol::T(sk).to_str(self.get_symbol_table())); }
706                            stack_sym = stack.pop().unwrap();
707                        }
708                    }
709                    if !recover_mode {
710                        if VERBOSE { println!("- MATCH {}", stream_sym.to_str(sym_table)); }
711                        if self.symbol_table.is_token_data(sk) {
712                            stack_t.push(std::mem::take(&mut stream_str)); // must use take() to comply with borrow checker
713                        }
714                        stack_sym = stack.pop().unwrap();
715                        wrapper.push_span(stream_span.take());
716                        advance_stream = true;
717                    }
718                }
719                (OpCode::End, Symbol::End) => {
720                    if nbr_recovers == 0 {
721                        wrapper.switch(Call::End(Terminate::None), 0, 0, None);
722                    }
723                    break;
724                }
725                (OpCode::End, _) => {
726                    wrapper.get_log_mut()
727                        .add_error(format!("syntax error: found extra symbol '{}' after end of parsing", stream_sym.to_str(sym_table)));
728                    wrapper.abort();
729                    return Err(ParserError::ExtraSymbol);
730                }
731                (_, Symbol::End) => {
732                    wrapper.get_log_mut()
733                        .add_error(format!("syntax error: found end of stream instead of '{}'", stack_sym.to_str_name(sym_table)));
734                    wrapper.abort();
735                    return Err(ParserError::UnexpectedEOS);
736                }
737                (_, _) => {
738                    wrapper.get_log_mut()
739                        .add_error(format!(
740                            "unexpected syntax error: input '{}' while expecting '{}'{}",
741                            stream_sym.to_str(sym_table), stack_sym.to_str_name(sym_table),
742                            if let Some(Pos(line, col)) = stream_pos { format!(", line {line}, col {col}") } else { String::new() }));
743                    wrapper.abort();
744                    return Err(ParserError::UnexpectedError);
745                }
746            }
747            match wrapper.check_abort_request() {
748                Terminate::None => {}
749                terminate @ (Terminate::Abort | Terminate::Conclude) => {
750                    if VERBOSE { println!("detected {terminate:?}"); }
751                    stack_t.clear();
752                    stack.clear();
753                    wrapper.abort();
754                    if nbr_recovers == 0 {
755                        wrapper.switch(Call::End(terminate), 0, 0, None);
756                    }
757                    if terminate == Terminate::Abort {
758                        return Err(ParserError::AbortRequest);
759                    } else {
760                        break;
761                    }
762                }
763            }
764        }
765        assert!(stack_t.is_empty(), "stack_t: {}", stack_t.join(", "));
766        assert!(stack.is_empty(), "stack: {}", stack.iter().map(|s| s.to_str(sym_table)).collect::<Vec<_>>().join(", "));
767        if nbr_recovers == 0 {
768            assert!(wrapper.is_stack_empty(), "symbol stack isn't empty");
769            assert!(wrapper.is_stack_t_empty(), "text stack isn't empty");
770            assert!(wrapper.is_stack_span_empty(), "span stack isn't empty");
771            Ok(())
772        } else {
773            // when nbr_recovers > 0, we know that at least one error has been reported to the log, no need to add one here
774            wrapper.abort();
775            Err(ParserError::EncounteredErrors)
776        }
777    }
778}
779
780#[cfg(feature = "test_utils")]
781impl<'a> Parser<'a> {
782    pub fn get_alt_var(&self) -> &[VarId] {
783        self.alt_var
784    }
785
786    pub fn get_alts(&self) -> &Vec<Alternative> {
787        &self.alts
788    }
789
790    pub fn get_opcodes(&self) -> &Vec<Vec<OpCode>> {
791        &self.opcodes
792    }
793}