Skip to main content

dbt_antlr4/
error_strategy.rs

1//! Error handling and recovery
2use std::borrow::Borrow;
3use std::error::Error;
4use std::fmt;
5use std::fmt::{Display, Formatter};
6use std::marker::PhantomData;
7use std::ops::DerefMut;
8use std::sync::Arc;
9
10use crate::atn_simulator::IATNSimulator;
11use crate::atn_state::*;
12use crate::char_stream::{CharStream, InputData};
13use crate::dfa::ScopeExt;
14use crate::errors::{
15    ANTLRError, ANTLRErrorKind, FailedPredicateError, InputMisMatchError, NoViableAltError,
16};
17use crate::interval_set::IntervalSet;
18use crate::parser::Parser;
19use crate::rule_context::RuleContext as _;
20use crate::token::{Token, TOKEN_DEFAULT_CHANNEL, TOKEN_EOF, TOKEN_EPSILON, TOKEN_INVALID_TYPE};
21use crate::token_factory::TokenFactory;
22use crate::transition::RuleTransition;
23use crate::tree::{RuleNode, Tree as _};
24use crate::utils::escape_whitespaces;
25
26/// The interface for defining strategies to deal with syntax errors encountered
27/// during a parse by ANTLR-generated parsers. We distinguish between three
28/// different kinds of errors:
29///  - The parser could not figure out which path to take in the ATN (none of
30///    the available alternatives could possibly match)
31///  - The current input does not match what we were looking for
32///  - A predicate evaluated to false
33///
34/// Implementations of this interface should report syntax errors by calling [`Parser::notifyErrorListeners`]
35///
36/// [`Parser::notifyErrorListeners`]: crate::parser::Parser::notifyErrorListeners
37pub trait ErrorStrategy<'input, 'arena, TF, P>
38where
39    'input: 'arena,
40    TF: TokenFactory<'input, 'arena> + 'arena,
41    P: Parser<'input, 'arena, TF>,
42{
43    ///Reset the error handler state for the specified `recognizer`.
44    fn reset(&mut self, recognizer: &mut P);
45
46    /// This method is called when an unexpected symbol is encountered during an
47    /// inline match operation, such as `Parser::match`. If the error
48    /// strategy successfully recovers from the match failure, this method
49    /// returns the `Token` instance which should be treated as the
50    /// successful result of the match.
51    ///
52    /// This method handles the consumption of any tokens - the caller should
53    /// **not** call `Parser::consume` after a successful recovery.
54    ///
55    /// Note that the calling code will not report an error if this method
56    /// returns successfully. The error strategy implementation is responsible
57    /// for calling `Parser::notifyErrorListeners` as appropriate.
58    ///
59    /// Returns `ANTLRError` if can't recover from unexpected input symbol
60    fn recover_inline(&mut self, recognizer: &mut P) -> Result<&'arena TF::Tok, ANTLRError>;
61
62    /// This method is called to recover from error `e`. This method is
63    /// called after `ErrorStrategy::reportError` by the default error handler
64    /// generated for a rule method.
65    ///
66    ///
67    fn recover(&mut self, recognizer: &mut P, e: &ANTLRError) -> Result<(), ANTLRError>;
68
69    /// This method provides the error handler with an opportunity to handle
70    /// syntactic or semantic errors in the input stream before they result in a
71    /// error.
72    ///
73    /// The generated code currently contains calls to `ErrorStrategy::sync` after
74    /// entering the decision state of a closure block ({@code (...)*} or
75    /// {@code (...)+}).</p>
76    fn sync(&mut self, recognizer: &mut P) -> Result<(), ANTLRError>;
77
78    /// Tests whether or not {@code recognizer} is in the process of recovering
79    /// from an error. In error recovery mode, `Parser::consume` will create
80    /// `ErrorNode` leaf instead of `TerminalNode` one  
81    fn in_error_recovery_mode(&mut self, recognizer: &mut P) -> bool;
82
83    /// Report any kind of `ANTLRError`. This method is called by
84    /// the default exception handler generated for a rule method.
85    fn report_error(&mut self, recognizer: &mut P, e: &ANTLRError);
86
87    /// This method is called when the parser successfully matches an input
88    /// symbol.
89    fn report_match(&mut self, recognizer: &mut P);
90}
91
92impl<'input, 'arena, TF, P> ErrorStrategy<'input, 'arena, TF, P>
93    for Box<dyn ErrorStrategy<'input, 'arena, TF, P> + 'arena>
94where
95    'input: 'arena,
96    TF: TokenFactory<'input, 'arena> + 'arena,
97    P: Parser<'input, 'arena, TF>,
98{
99    #[inline(always)]
100    fn reset(&mut self, recognizer: &mut P) {
101        self.deref_mut().reset(recognizer)
102    }
103
104    #[inline(always)]
105    fn recover_inline(&mut self, recognizer: &mut P) -> Result<&'arena TF::Tok, ANTLRError> {
106        self.deref_mut().recover_inline(recognizer)
107    }
108
109    #[inline(always)]
110    fn recover(&mut self, recognizer: &mut P, e: &ANTLRError) -> Result<(), ANTLRError> {
111        self.deref_mut().recover(recognizer, e)
112    }
113
114    #[inline(always)]
115    fn sync(&mut self, recognizer: &mut P) -> Result<(), ANTLRError> {
116        self.deref_mut().sync(recognizer)
117    }
118
119    #[inline(always)]
120    fn in_error_recovery_mode(&mut self, recognizer: &mut P) -> bool {
121        self.deref_mut().in_error_recovery_mode(recognizer)
122    }
123
124    #[inline(always)]
125    fn report_error(&mut self, recognizer: &mut P, e: &ANTLRError) {
126        self.deref_mut().report_error(recognizer, e)
127    }
128
129    #[inline(always)]
130    fn report_match(&mut self, recognizer: &mut P) {
131        self.deref_mut().report_match(recognizer)
132    }
133}
134
135/// This is the default implementation of `ErrorStrategy` used for
136/// error reporting and recovery in ANTLR parsers.
137#[derive(Debug)]
138pub struct DefaultErrorStrategy<'input, 'arena, TF, P>
139where
140    'input: 'arena,
141    TF: TokenFactory<'input, 'arena> + 'arena,
142    P: Parser<'input, 'arena, TF>,
143{
144    error_recovery_mode: bool,
145    last_error_index: isize,
146    last_error_states: Option<IntervalSet>,
147    next_tokens_state: i32,
148    next_tokens_ctx: Option<&'arena P::Node>,
149    pd: PhantomData<(TF, P)>,
150}
151
152impl<'input, 'arena, TF, P> Default for DefaultErrorStrategy<'input, 'arena, TF, P>
153where
154    'input: 'arena,
155    TF: TokenFactory<'input, 'arena> + 'arena,
156    P: Parser<'input, 'arena, TF>,
157{
158    fn default() -> Self {
159        Self::new()
160    }
161}
162
163impl<'input, 'arena, TF, P> DefaultErrorStrategy<'input, 'arena, TF, P>
164where
165    'input: 'arena,
166    TF: TokenFactory<'input, 'arena> + 'arena,
167    P: Parser<'input, 'arena, TF>,
168{
169    /// Creates new instance of `DefaultErrorStrategy`
170    pub fn new() -> Self {
171        Self {
172            error_recovery_mode: false,
173            last_error_index: -1,
174            last_error_states: None,
175            next_tokens_state: ATNSTATE_INVALID_STATE_NUMBER,
176            next_tokens_ctx: None,
177            pd: PhantomData,
178        }
179    }
180
181    fn begin_error_condition(&mut self, _recognizer: &P) {
182        self.error_recovery_mode = true;
183    }
184
185    fn end_error_condition(&mut self, _recognizer: &P) {
186        self.error_recovery_mode = false;
187        self.last_error_index = -1;
188        self.last_error_states = None;
189    }
190
191    fn report_no_viable_alternative(&self, recognizer: &mut P, e: &NoViableAltError) -> String {
192        let input = if e.start_token.token_type == TOKEN_EOF {
193            "<EOF>".to_owned()
194        } else {
195            recognizer.get_input_stream_mut().get_text_from_interval(
196                e.start_token.get_token_index(),
197                e.base.offending_token.get_token_index(),
198            )
199        };
200
201        format!("no viable alternative at input '{}'", input)
202    }
203
204    fn report_input_mismatch(&self, recognizer: &P, e: &InputMisMatchError) -> String {
205        format!(
206            "mismatched input {} expecting {}",
207            self.get_token_error_display(&e.base.offending_token),
208            e.base
209                .get_expected_tokens(recognizer)
210                .to_token_string(recognizer.get_vocabulary())
211        )
212    }
213
214    fn report_failed_predicate(&self, recognizer: &P, e: &FailedPredicateError) -> String {
215        format!(
216            "rule {} {}",
217            recognizer.get_rule_names()[recognizer.get_current_context().get_rule_index()],
218            e.base.message
219        )
220    }
221
222    fn report_unwanted_token(&mut self, recognizer: &mut P) {
223        if self.in_error_recovery_mode(recognizer) {
224            return;
225        }
226
227        self.begin_error_condition(recognizer);
228        let expecting = self.get_expected_tokens(recognizer);
229        let expecting = expecting.to_token_string(recognizer.get_vocabulary());
230        let t = recognizer.get_current_token().borrow();
231        let token_name = self.get_token_error_display(t);
232        let msg = format!("extraneous input {} expecting {}", token_name, expecting);
233        let t = t.get_token_index();
234        recognizer.notify_error_listeners(msg, Some(t), None);
235    }
236
237    fn report_missing_token(&mut self, recognizer: &mut P) {
238        if self.in_error_recovery_mode(recognizer) {
239            return;
240        }
241
242        self.begin_error_condition(recognizer);
243        let expecting = self.get_expected_tokens(recognizer);
244        let expecting = expecting.to_token_string(recognizer.get_vocabulary());
245        let t = recognizer.get_current_token().borrow();
246        let _token_name = self.get_token_error_display(t);
247        let msg = format!(
248            "missing {} at {}",
249            expecting,
250            self.get_token_error_display(t)
251        );
252        let t = t.get_token_index();
253        recognizer.notify_error_listeners(msg, Some(t), None);
254    }
255
256    fn single_token_insertion(&mut self, recognizer: &mut P) -> bool {
257        let current_token = recognizer.get_input_stream_mut().la(1);
258
259        let atn = recognizer.get_interpreter().atn();
260        let current_state = atn.get_state(recognizer.get_state());
261        let next = current_state
262            .get_transitions()
263            .first()
264            .unwrap()
265            .get_target();
266        let expect_at_ll2 = atn.next_tokens_in_ctx(next, Some(recognizer.get_current_context()));
267        if expect_at_ll2.contains(current_token) {
268            self.report_missing_token(recognizer);
269            return true;
270        }
271        false
272    }
273
274    fn single_token_deletion(
275        &mut self,
276        recognizer: &mut P,
277    ) -> Result<Option<&'arena TF::Tok>, ANTLRError> {
278        let next_token_type = recognizer.get_input_stream_mut().la(2);
279        let expecting = self.get_expected_tokens(recognizer);
280        //        println!("expecting {}", expecting.to_token_string(recognizer.get_vocabulary()));
281        if expecting.contains(next_token_type) {
282            self.report_unwanted_token(recognizer);
283            recognizer.consume(self)?;
284            self.report_match(recognizer);
285            let matched_symbol = recognizer.get_current_token();
286            return Ok(Some(matched_symbol));
287        }
288        Ok(None)
289    }
290
291    fn get_missing_symbol(&self, recognizer: &mut P) -> &'arena mut TF::Tok {
292        let expected = self.get_expected_tokens(recognizer);
293        let expected_token_type = expected.get_min().unwrap_or(TOKEN_INVALID_TYPE);
294        let token_text = if expected_token_type == TOKEN_EOF {
295            "<missing EOF>".to_owned()
296        } else {
297            format!(
298                "<missing {}>",
299                recognizer
300                    .get_vocabulary()
301                    .get_display_name(expected_token_type)
302            )
303        };
304
305        let mut curr = recognizer.get_current_token().borrow();
306        if curr.get_token_type() == TOKEN_EOF {
307            curr = recognizer
308                .get_input_stream()
309                .run(|it| it.get((it.index() - 1).max(0)).borrow());
310        }
311        let (line, column) = (curr.get_line(), curr.get_char_position_in_line());
312        recognizer.get_token_factory().create(
313            None::<&mut dyn CharStream>,
314            expected_token_type,
315            Some(token_text),
316            TOKEN_DEFAULT_CHANNEL,
317            -1,
318            -1,
319            line,
320            column,
321        )
322        // Token::to_owned(token.borrow())
323        // .modify_with(|it| it.text = token_text)
324    }
325
326    fn get_expected_tokens(&self, recognizer: &P) -> IntervalSet {
327        recognizer.get_expected_tokens()
328    }
329
330    fn get_token_error_display(&self, t: &dyn Token) -> String {
331        let text = t.get_text().to_display();
332        self.escape_ws_and_quote(&text)
333    }
334
335    fn escape_ws_and_quote(&self, s: &str) -> String {
336        format!("'{}'", escape_whitespaces(s, false))
337    }
338
339    fn get_error_recovery_set(&self, recognizer: &P) -> IntervalSet {
340        let atn = recognizer.get_interpreter().atn();
341        let mut ctx = Some(recognizer.get_current_context());
342        let mut recover_set = IntervalSet::new();
343        while let Some(c) = ctx {
344            if c.get_invoking_state() < 0 {
345                break;
346            }
347
348            let invoking_state = atn.get_state(c.get_invoking_state());
349            let tr = invoking_state.get_transitions().first().unwrap();
350            let tr = tr.try_as::<RuleTransition>().unwrap();
351            let follow = atn.next_tokens(&tr.follow_state);
352            recover_set.add_set(follow);
353            ctx = c.get_parent();
354        }
355        recover_set.remove_one(TOKEN_EPSILON);
356        recover_set
357    }
358
359    fn consume_until(&mut self, recognizer: &mut P, set: &IntervalSet) -> Result<(), ANTLRError> {
360        let mut ttype = recognizer.get_input_stream_mut().la(1);
361        while ttype != TOKEN_EOF && !set.contains(ttype) {
362            recognizer.consume(self)?;
363            ttype = recognizer.get_input_stream_mut().la(1);
364        }
365        Ok(())
366    }
367}
368
369impl<'input, 'arena, TF, P> ErrorStrategy<'input, 'arena, TF, P>
370    for DefaultErrorStrategy<'input, 'arena, TF, P>
371where
372    'input: 'arena,
373    TF: TokenFactory<'input, 'arena> + 'arena,
374    P: Parser<'input, 'arena, TF>,
375{
376    fn reset(&mut self, recognizer: &mut P) {
377        self.end_error_condition(recognizer)
378    }
379
380    fn recover_inline(&mut self, recognizer: &mut P) -> Result<&'arena TF::Tok, ANTLRError> {
381        let t = self
382            .single_token_deletion(recognizer)?
383            .map(|it| it.to_owned());
384        if let Some(t) = t {
385            recognizer.consume(self)?;
386            return Ok(t);
387        }
388
389        if self.single_token_insertion(recognizer) {
390            return Ok(self.get_missing_symbol(recognizer));
391        }
392
393        if let Some(next_tokens_ctx) = &self.next_tokens_ctx {
394            Err(ANTLRError::input_mismatch_with_state(
395                recognizer,
396                self.next_tokens_state,
397                next_tokens_ctx,
398            ))
399        } else {
400            Err(ANTLRError::input_mismatch(recognizer))
401        }
402        //        Err(ANTLRError::IllegalStateError("aaa".to_string()))
403    }
404
405    fn recover(&mut self, recognizer: &mut P, _e: &ANTLRError) -> Result<(), ANTLRError> {
406        if self.last_error_index == recognizer.get_input_stream_mut().index()
407            && self.last_error_states.is_some()
408            && self
409                .last_error_states
410                .as_ref()
411                .unwrap()
412                .contains(recognizer.get_state())
413        {
414            recognizer.consume(self)?;
415        }
416
417        self.last_error_index = recognizer.get_input_stream_mut().index();
418        self.last_error_states
419            .get_or_insert(IntervalSet::new())
420            .apply(|x| x.add_one(recognizer.get_state()));
421        let follow_set = self.get_error_recovery_set(recognizer);
422        self.consume_until(recognizer, &follow_set)?;
423        Ok(())
424    }
425
426    fn sync(&mut self, recognizer: &mut P) -> Result<(), ANTLRError> {
427        if self.in_error_recovery_mode(recognizer) {
428            return Ok(());
429        }
430        let next = recognizer.get_input_stream_mut().la(1);
431        let state = recognizer
432            .get_interpreter()
433            .atn()
434            .make_state_ref(recognizer.get_state());
435
436        let next_tokens = recognizer.get_interpreter().atn().next_tokens(&state);
437        //        println!("{:?}",next_tokens);
438
439        if next_tokens.contains(next) {
440            self.next_tokens_state = ATNSTATE_INVALID_STATE_NUMBER;
441            self.next_tokens_ctx = None;
442            return Ok(());
443        }
444
445        if next_tokens.contains(TOKEN_EPSILON) {
446            if self.next_tokens_ctx.is_none() {
447                self.next_tokens_state = recognizer.get_state();
448                self.next_tokens_ctx = Some(recognizer.get_current_context());
449            }
450            return Ok(());
451        }
452
453        match state.get_state_type_id() {
454            ATNSTATE_BLOCK_START
455            | ATNSTATE_PLUS_BLOCK_START
456            | ATNSTATE_STAR_BLOCK_START
457            | ATNSTATE_STAR_LOOP_ENTRY => {
458                if self.single_token_deletion(recognizer)?.is_none() {
459                    return Err(ANTLRError::input_mismatch(recognizer));
460                }
461            }
462            ATNSTATE_PLUS_LOOP_BACK | ATNSTATE_STAR_LOOP_BACK => {
463                self.report_unwanted_token(recognizer);
464                let mut expecting = recognizer.get_expected_tokens();
465                expecting.add_set(&self.get_error_recovery_set(recognizer));
466                self.consume_until(recognizer, &expecting)?;
467            }
468            _ => panic!("invalid ANTState type id"),
469        }
470
471        Ok(())
472    }
473
474    fn in_error_recovery_mode(&mut self, _recognizer: &mut P) -> bool {
475        self.error_recovery_mode
476    }
477
478    fn report_error(&mut self, recognizer: &mut P, e: &ANTLRError) {
479        if self.in_error_recovery_mode(recognizer) {
480            return;
481        }
482
483        self.begin_error_condition(recognizer);
484        let msg = match e.as_ref() {
485            ANTLRErrorKind::NoAltError(e) => self.report_no_viable_alternative(recognizer, e),
486            ANTLRErrorKind::InputMismatchError(e) => self.report_input_mismatch(recognizer, e),
487            ANTLRErrorKind::PredicateError(e) => self.report_failed_predicate(recognizer, e),
488            _ => e.to_string(),
489        };
490        let offending_token_index = e.get_offending_token().map(|it| it.get_token_index());
491        recognizer.notify_error_listeners(msg, offending_token_index, Some(e))
492    }
493
494    fn report_match(&mut self, recognizer: &mut P) {
495        self.end_error_condition(recognizer);
496        //println!("matched token succesfully {}", recognizer.get_input_stream().la(1))
497    }
498}
499
500/// This implementation of `ANTLRErrorStrategy` responds to syntax errors
501/// by immediately canceling the parse operation with a
502/// `ParseCancellationException`. The implementation ensures that the
503/// [`ParserRuleContext.exception`] field is set for all parse tree nodes
504/// that were not completed prior to encountering the error.
505///
506/// <p> This error strategy is useful in the following scenarios.</p>
507///
508///  - Two-stage parsing: This error strategy allows the first stage of
509///    two-stage parsing to immediately terminate if an error is encountered,
510///    and immediately fall back to the second stage. In addition to avoiding
511///    wasted work by attempting to recover from errors here, the empty
512///    implementation of `sync` improves the performance of the first stage.
513///  - Silent validation: When syntax errors are not being reported or logged,
514///    and the parse result is simply ignored if errors occur, the
515///    `BailErrorStrategy` avoids wasting work on recovering from errors when
516///    the result will be ignored either way.
517///
518/// # Usage
519/// ```ignore
520/// use dbt_antlr4::error_strategy::BailErrorStrategy;
521/// myparser.err_handler = BailErrorStrategy::new();
522/// ```
523///
524/// [`ParserRuleContext.exception`]: todo
525/// */
526#[derive(Default, Debug)]
527pub struct BailErrorStrategy<'input, 'arena, TF, P>(DefaultErrorStrategy<'input, 'arena, TF, P>)
528where
529    'input: 'arena,
530    TF: TokenFactory<'input, 'arena> + 'arena,
531    P: Parser<'input, 'arena, TF>;
532
533impl<'input, 'arena, TF, P> BailErrorStrategy<'input, 'arena, TF, P>
534where
535    'input: 'arena,
536    TF: TokenFactory<'input, 'arena> + 'arena,
537    P: Parser<'input, 'arena, TF>,
538{
539    /// Creates new instance of `BailErrorStrategy`
540    pub fn new() -> Self {
541        Self(DefaultErrorStrategy::new())
542    }
543
544    fn process_error(&self, recognizer: &mut P, e: &ANTLRError) -> ANTLRError {
545        let mut ctx = recognizer.get_current_context();
546        let _: Option<()> = (|| loop {
547            ctx.set_exception(e.clone(), recognizer.get_arena());
548            ctx = ctx.get_parent()?
549        })();
550        ANTLRError::fall_through(Arc::new(ParseCancelledError(e.clone())))
551    }
552}
553
554/// `ANTLRError::FallThrough` Error returned `BailErrorStrategy` to bail out from parsing
555#[derive(Debug)]
556pub struct ParseCancelledError(ANTLRError);
557
558impl Error for ParseCancelledError {
559    fn source(&self) -> Option<&(dyn Error + 'static)> {
560        Some(&self.0)
561    }
562}
563
564impl Display for ParseCancelledError {
565    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
566        f.write_str("ParseCancelledError, caused by ")?;
567        self.0.fmt(f)
568    }
569}
570
571impl<'input, 'arena, TF, P> ErrorStrategy<'input, 'arena, TF, P>
572    for BailErrorStrategy<'input, 'arena, TF, P>
573where
574    'input: 'arena,
575    TF: TokenFactory<'input, 'arena> + 'arena,
576    P: Parser<'input, 'arena, TF>,
577{
578    #[inline(always)]
579    fn reset(&mut self, recognizer: &mut P) {
580        self.0.reset(recognizer)
581    }
582
583    #[cold]
584    fn recover_inline(&mut self, recognizer: &mut P) -> Result<&'arena TF::Tok, ANTLRError> {
585        let err = ANTLRError::input_mismatch(recognizer);
586
587        Err(self.process_error(recognizer, &err))
588    }
589
590    #[cold]
591    fn recover(&mut self, recognizer: &mut P, e: &ANTLRError) -> Result<(), ANTLRError> {
592        Err(self.process_error(recognizer, e))
593    }
594
595    #[inline(always)]
596    fn sync(&mut self, _recognizer: &mut P) -> Result<(), ANTLRError> {
597        /* empty */
598        Ok(())
599    }
600
601    #[inline(always)]
602    fn in_error_recovery_mode(&mut self, recognizer: &mut P) -> bool {
603        self.0.in_error_recovery_mode(recognizer)
604    }
605
606    #[inline(always)]
607    fn report_error(&mut self, recognizer: &mut P, e: &ANTLRError) {
608        self.0.report_error(recognizer, e)
609    }
610
611    #[inline(always)]
612    fn report_match(&mut self, _recognizer: &mut P) {}
613}