lady_deirdre/syntax/
error.rs

1////////////////////////////////////////////////////////////////////////////////
2// This file is part of "Lady Deirdre", a compiler front-end foundation       //
3// technology.                                                                //
4//                                                                            //
5// This work is proprietary software with source-available code.              //
6//                                                                            //
7// To copy, use, distribute, or contribute to this work, you must agree to    //
8// the terms of the General License Agreement:                                //
9//                                                                            //
10// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md           //
11//                                                                            //
12// The agreement grants a Basic Commercial License, allowing you to use       //
13// this work in non-commercial and limited commercial products with a total   //
14// gross revenue cap. To remove this commercial limit for one of your         //
15// products, you must acquire a Full Commercial License.                      //
16//                                                                            //
17// If you contribute to the source code, documentation, or related materials, //
18// you must grant me an exclusive license to these contributions.             //
19// Contributions are governed by the "Contributions" section of the General   //
20// License Agreement.                                                         //
21//                                                                            //
22// Copying the work in parts is strictly forbidden, except as permitted       //
23// under the General License Agreement.                                       //
24//                                                                            //
25// If you do not or cannot agree to the terms of this Agreement,              //
26// do not use this work.                                                      //
27//                                                                            //
28// This work is provided "as is", without any warranties, express or implied, //
29// except where such disclaimers are legally invalid.                         //
30//                                                                            //
31// Copyright (c) 2024 Ilya Lakhin (Илья Александрович Лахин).                 //
32// All rights reserved.                                                       //
33////////////////////////////////////////////////////////////////////////////////
34
35use std::{
36    borrow::Cow,
37    cmp::Ordering,
38    collections::HashSet,
39    fmt::{Debug, Display, Formatter},
40    marker::PhantomData,
41};
42
43use crate::{
44    arena::{Entry, Id, Identifiable},
45    format::{AnnotationPriority, SnippetFormatter},
46    lexis::{
47        Length,
48        SiteRefSpan,
49        SourceCode,
50        ToSite,
51        ToSpan,
52        Token,
53        TokenRef,
54        TokenRule,
55        TokenSet,
56    },
57    syntax::{AbstractNode, Node, NodeRule, NodeSet, RecoveryResult, SyntaxTree, ROOT_RULE},
58    units::CompilationUnit,
59};
60
61/// An [ErrorRef] reference that does not point to any syntax error.
62///
63/// The value of this static equals to the [ErrorRef::nil] value.
64pub static NIL_ERROR_REF: ErrorRef = ErrorRef::nil();
65
66/// A syntax error that may occur during the parsing process.
67///
68/// In Lady Deirdre syntax parsing is an
69/// [infallible](crate::syntax::SyntaxSession#parsing-algorithm-considerations)
70/// process. Hence, the syntax error object represents a report of the parser's
71/// error recovery attempt.
72#[derive(Clone, PartialEq, Eq, Debug)]
73pub struct SyntaxError {
74    /// A [span of tokens](SiteRefSpan) where the error occurred.
75    pub span: SiteRefSpan,
76
77    /// A parsing rule that reported the error.
78    pub context: NodeRule,
79
80    /// A type of the recovery strategy that has been applied.
81    pub recovery: RecoveryResult,
82
83    /// A set of tokens that the parser expected in the [span](Self::span).
84    pub expected_tokens: &'static TokenSet,
85
86    /// A set of nodes that the parser expected in the [span](Self::span).
87    pub expected_nodes: &'static NodeSet,
88}
89
90impl SyntaxError {
91    /// Returns a displayable object that prints a canonical title of
92    /// this syntax error.
93    #[inline(always)]
94    pub fn title<N: AbstractNode>(&self) -> impl Display + '_ {
95        struct Title<'error, N> {
96            error: &'error SyntaxError,
97            _node: PhantomData<N>,
98        }
99
100        impl<'error, N: Node> Debug for Title<'error, N> {
101            #[inline(always)]
102            fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
103                Display::fmt(self, formatter)
104            }
105        }
106
107        impl<'error, N: AbstractNode> Display for Title<'error, N> {
108            #[inline(always)]
109            fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
110                match N::rule_description(self.error.context, true) {
111                    Some(context) => formatter.write_fmt(format_args!("{context} syntax error.")),
112                    None => formatter.write_str("Syntax error."),
113                }
114            }
115        }
116
117        Title {
118            error: self,
119            _node: PhantomData::<N>,
120        }
121    }
122
123    /// Returns a displayable object that prints a canonical message of
124    /// this syntax error.
125    ///
126    /// The `code` parameter provides access to the compilation unit's tokens
127    /// of where the error occurred.
128    #[inline(always)]
129    pub fn message<N: Node>(
130        &self,
131        code: &impl SourceCode<Token = <N as Node>::Token>,
132    ) -> impl Debug + Display + '_ {
133        struct Message<'error, N> {
134            error: &'error SyntaxError,
135            empty_span: bool,
136            _node: PhantomData<N>,
137        }
138
139        impl<'error, N: Node> Debug for Message<'error, N> {
140            #[inline(always)]
141            fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
142                Display::fmt(self, formatter)
143            }
144        }
145
146        impl<'error, N: Node> Display for Message<'error, N> {
147            fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
148                const LENGTH_MAX: Length = 80;
149
150                #[derive(PartialEq, Eq)]
151                enum TokenOrNode {
152                    Token(Cow<'static, str>),
153                    Node(Cow<'static, str>),
154                }
155
156                impl PartialOrd for TokenOrNode {
157                    #[inline(always)]
158                    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
159                        Some(self.cmp(other))
160                    }
161                }
162
163                impl Ord for TokenOrNode {
164                    fn cmp(&self, other: &Self) -> Ordering {
165                        match (self, other) {
166                            (Self::Token(_), Self::Node(_)) => Ordering::Greater,
167                            (Self::Node(_), Self::Token(_)) => Ordering::Less,
168                            (Self::Token(this), Self::Token(other)) => this.cmp(other),
169                            (Self::Node(this), Self::Node(other)) => this.cmp(other),
170                        }
171                    }
172                }
173
174                impl TokenOrNode {
175                    #[inline(always)]
176                    fn print_to(&self, target: &mut String) {
177                        match self {
178                            Self::Node(string) => target.push_str(string.as_ref()),
179
180                            Self::Token(string) => {
181                                target.push('\'');
182                                target.push_str(string.as_ref());
183                                target.push('\'');
184                            }
185                        }
186                    }
187                }
188
189                struct OutString {
190                    alt: bool,
191                    set: HashSet<&'static str>,
192                    empty_span: bool,
193                    context: Cow<'static, str>,
194                    recovery: RecoveryResult,
195                    components: Vec<TokenOrNode>,
196                    exhaustive: bool,
197                }
198
199                impl OutString {
200                    fn new<N: Node>(
201                        alt: bool,
202                        capacity: usize,
203                        empty_span: bool,
204                        context: NodeRule,
205                        recovery: RecoveryResult,
206                    ) -> Self {
207                        let set = HashSet::with_capacity(capacity);
208
209                        let context = N::rule_description(context, true)
210                            .filter(|_| context != ROOT_RULE)
211                            .map(Cow::Borrowed)
212                            .unwrap_or(Cow::Borrowed(""));
213
214                        Self {
215                            alt,
216                            set,
217                            empty_span,
218                            context,
219                            recovery,
220                            components: Vec::with_capacity(capacity),
221                            exhaustive: true,
222                        }
223                    }
224
225                    fn push_token<N: Node>(&mut self, rule: TokenRule) {
226                        let description = match <N as Node>::Token::rule_description(rule, self.alt)
227                        {
228                            Some(string) => string,
229                            None => return,
230                        };
231
232                        if self.set.insert(description) {
233                            self.components
234                                .push(TokenOrNode::Token(Cow::Borrowed(description)));
235                        }
236                    }
237
238                    fn push_node<N: Node>(&mut self, rule: NodeRule) {
239                        let description = match N::rule_description(rule, self.alt) {
240                            Some(string) => string,
241                            None => return,
242                        };
243
244                        if self.set.insert(description) {
245                            self.components
246                                .push(TokenOrNode::Node(Cow::Borrowed(description)));
247                        }
248                    }
249
250                    fn shorten(&mut self) -> bool {
251                        if self.alt {
252                            return false;
253                        }
254
255                        if self.components.len() <= 2 {
256                            return false;
257                        }
258
259                        let _ = self.components.pop();
260                        self.exhaustive = false;
261
262                        true
263                    }
264
265                    #[inline(always)]
266                    fn missing_str(&self) -> &'static str {
267                        static STRING: &'static str = "missing";
268                        static ALT_STR: &'static str = "Missing";
269
270                        match self.alt {
271                            false => STRING,
272                            true => ALT_STR,
273                        }
274                    }
275
276                    #[inline(always)]
277                    fn unexpected_str(&self) -> &'static str {
278                        static STRING: &'static str = "unexpected input";
279                        static ALT_STR: &'static str = "Unexpected input";
280
281                        match self.alt {
282                            false => STRING,
283                            true => ALT_STR,
284                        }
285                    }
286
287                    #[inline(always)]
288                    fn in_str(&self) -> &'static str {
289                        static STRING: &'static str = " in ";
290                        static ALT_STR: &'static str = " in ";
291
292                        match self.alt {
293                            false => STRING,
294                            true => ALT_STR,
295                        }
296                    }
297
298                    #[inline(always)]
299                    fn eoi_str(&self) -> &'static str {
300                        static STRING: &'static str = "unexpected end of input";
301                        static ALT_STR: &'static str = "Unexpected end of input";
302
303                        match self.alt {
304                            false => STRING,
305                            true => ALT_STR,
306                        }
307                    }
308
309                    #[inline(always)]
310                    fn or_str(&self) -> &'static str {
311                        static STRING: &'static str = " or ";
312
313                        STRING
314                    }
315
316                    #[inline(always)]
317                    fn comma_str(&self) -> &'static str {
318                        static STRING: &'static str = ", ";
319
320                        STRING
321                    }
322
323                    #[inline(always)]
324                    fn etc_str(&self) -> &'static str {
325                        static STRING: &'static str = "…";
326                        static ALT_STR: &'static str = "...";
327
328                        match self.alt {
329                            false => STRING,
330                            true => ALT_STR,
331                        }
332                    }
333
334                    fn string(&self) -> String {
335                        let mut result = String::new();
336
337                        let print_components;
338
339                        match self.recovery {
340                            RecoveryResult::InsertRecover => {
341                                result.push_str(self.missing_str());
342                                print_components = true;
343                            }
344
345                            RecoveryResult::PanicRecover if self.empty_span => {
346                                result.push_str(self.missing_str());
347                                print_components = true;
348                            }
349
350                            RecoveryResult::PanicRecover => {
351                                result.push_str(self.unexpected_str());
352                                print_components = false;
353                            }
354
355                            RecoveryResult::UnexpectedEOI => {
356                                result.push_str(self.eoi_str());
357                                print_components = false;
358                            }
359
360                            RecoveryResult::UnexpectedToken => {
361                                result.push_str(self.missing_str());
362                                print_components = true;
363                            }
364                        };
365
366                        if print_components {
367                            let mut is_first = true;
368
369                            for component in &self.components {
370                                match is_first {
371                                    true => {
372                                        result.push(' ');
373                                        is_first = false;
374                                    }
375                                    false => match self.components.len() == 2 && self.exhaustive {
376                                        true => result.push_str(self.or_str()),
377                                        false => result.push_str(self.comma_str()),
378                                    },
379                                }
380
381                                component.print_to(&mut result);
382                            }
383                        }
384
385                        match self.exhaustive {
386                            false => {
387                                result.push_str(self.etc_str());
388                            }
389
390                            true => {
391                                if !self.context.is_empty() {
392                                    result.push_str(self.in_str());
393                                    result.push_str(self.context.as_ref());
394                                }
395
396                                if self.alt {
397                                    result.push('.');
398                                }
399                            }
400                        }
401
402                        result
403                    }
404                }
405
406                let mut out = OutString::new::<N>(
407                    formatter.alternate(),
408                    self.error.expected_tokens.len() + self.error.expected_nodes.len(),
409                    self.empty_span,
410                    self.error.context,
411                    self.error.recovery,
412                );
413
414                for rule in self.error.expected_nodes {
415                    out.push_node::<N>(rule);
416                }
417
418                for rule in self.error.expected_tokens {
419                    out.push_token::<N>(rule);
420                }
421
422                out.components.sort();
423
424                let mut string = out.string();
425
426                while string.chars().count() > LENGTH_MAX {
427                    if !out.shorten() {
428                        break;
429                    }
430
431                    string = out.string();
432                }
433
434                formatter.write_str(string.as_ref())
435            }
436        }
437
438        let span = self.aligned_span(code);
439
440        Message {
441            error: self,
442            empty_span: span.start == span.end,
443            _node: PhantomData::<N>,
444        }
445    }
446
447    /// Returns a displayable object that prints a
448    /// [Snippet](crate::format::Snippet) that annotates the source code span
449    /// with an error message.
450    ///
451    /// The `unit` parameter provides access to the compilation unit of where
452    /// the error occurred.
453    #[inline(always)]
454    pub fn display<'a>(&'a self, unit: &'a impl CompilationUnit) -> impl Debug + Display + 'a {
455        struct DisplaySyntaxError<'a, U: CompilationUnit> {
456            error: &'a SyntaxError,
457            unit: &'a U,
458        }
459
460        impl<'a, U: CompilationUnit> Debug for DisplaySyntaxError<'a, U> {
461            #[inline(always)]
462            fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
463                Display::fmt(self, formatter)
464            }
465        }
466
467        impl<'a, U: CompilationUnit> Display for DisplaySyntaxError<'a, U> {
468            #[inline]
469            fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
470                let aligned_span = self.error.aligned_span(self.unit);
471
472                if !formatter.alternate() {
473                    formatter.write_fmt(format_args!("{}", aligned_span.display(self.unit)))?;
474                    formatter.write_str(": ")?;
475                    formatter.write_fmt(format_args!(
476                        "{:#}",
477                        self.error.message::<U::Node>(self.unit)
478                    ))?;
479
480                    return Ok(());
481                }
482
483                formatter
484                    .snippet(self.unit)
485                    .set_caption(format!("Unit({})", self.unit.id()))
486                    .set_summary(self.error.title::<U::Node>().to_string())
487                    .annotate(
488                        aligned_span,
489                        AnnotationPriority::Primary,
490                        format!("{}", self.error.message::<U::Node>(self.unit)),
491                    )
492                    .finish()
493            }
494        }
495
496        DisplaySyntaxError { error: self, unit }
497    }
498
499    /// Computes a [token span](SiteRefSpan) from the syntax error's original
500    /// span such that the new span would be properly aligned in regards to
501    /// the whitespaces and the line breaks surrounding the original span.
502    ///
503    /// The `code` parameter provides access to the compilation unit's tokens
504    /// of where the error occurred.
505    ///
506    /// The exact details of the underlying algorithm are not specified,
507    /// and the algorithm is subject to improvements over time in the minor
508    /// versions of this crate, but the function attempts to generate a span
509    /// that would better fit for the end-user facing rather than the original
510    /// machine-generated span.
511    #[inline(always)]
512    pub fn aligned_span(&self, code: &impl SourceCode) -> SiteRefSpan {
513        match self.recovery {
514            RecoveryResult::InsertRecover => self.widen_span(code),
515            _ => self.shorten_span(code),
516        }
517    }
518
519    fn widen_span(&self, code: &impl SourceCode) -> SiteRefSpan {
520        if !self.span.is_valid_span(code) {
521            return self.span.clone();
522        }
523
524        let mut start = self.span.start;
525        let mut end = self.span.end;
526
527        loop {
528            let previous = start.prev(code);
529
530            if previous == start {
531                break;
532            }
533
534            if !previous.is_valid_site(code) {
535                break;
536            }
537
538            if !Self::is_blank(code, previous.token_ref()) {
539                break;
540            }
541
542            start = previous;
543        }
544
545        loop {
546            if !Self::is_blank(code, end.token_ref()) {
547                break;
548            }
549
550            let next = end.next(code);
551
552            if next == end {
553                break;
554            }
555
556            if !end.is_valid_site(code) {
557                break;
558            }
559
560            end = next;
561        }
562
563        start..end
564    }
565
566    fn shorten_span(&self, code: &impl SourceCode) -> SiteRefSpan {
567        if !self.span.is_valid_span(code) {
568            return self.span.clone();
569        }
570
571        let mut start = self.span.start;
572        let mut end = self.span.end;
573
574        while start != end {
575            let previous = end.prev(code);
576
577            if previous == end {
578                break;
579            }
580
581            if !previous.is_valid_site(code) {
582                break;
583            }
584
585            if !Self::is_blank(code, previous.token_ref()) {
586                break;
587            }
588
589            end = previous;
590        }
591
592        while start != end {
593            if !Self::is_blank(code, start.token_ref()) {
594                break;
595            }
596
597            let next = start.next(code);
598
599            if next == start {
600                break;
601            }
602
603            if !next.is_valid_site(code) {
604                break;
605            }
606
607            start = next;
608        }
609
610        if start == end {
611            let mut site_ref = self.span.start;
612
613            loop {
614                let previous = site_ref.prev(code);
615
616                if previous == site_ref {
617                    break;
618                }
619
620                if !previous.is_valid_site(code) {
621                    break;
622                }
623
624                if !Self::is_blank(code, previous.token_ref()) {
625                    break;
626                }
627
628                site_ref = previous;
629            }
630
631            start = site_ref;
632            end = site_ref;
633        }
634
635        start..end
636    }
637
638    #[inline(always)]
639    fn is_blank(code: &impl SourceCode, token_ref: &TokenRef) -> bool {
640        let Some(string) = token_ref.string(code) else {
641            return false;
642        };
643
644        string
645            .as_bytes()
646            .iter()
647            .all(|&ch| ch == b' ' || ch == b'\t' || ch == b'\r' || ch == b'\n' || ch == b'\x0c')
648    }
649}
650
651/// A globally unique reference of the [syntax error](SyntaxError) in the
652/// syntax tree.
653///
654/// Each [syntax tree's](SyntaxTree) syntax error could be uniquely
655/// addressed within a pair of the [Id] and [Entry], where the identifier
656/// uniquely addresses a specific compilation unit instance (syntax tree), and
657/// the entry part addresses a syntax error within this tree.
658///
659/// Essentially, ErrorRef is a composite index.
660///
661/// Both components of this index form a unique pair
662/// (within the current process), because each compilation unit has a unique
663/// identifier, and the syntax errors within the syntax tree always receive
664/// unique [Entry] indices within the syntax tree.
665///
666/// If the syntax error instance has been removed from the syntax tree
667/// over time, new syntax error within this syntax tree will never occupy
668/// the same ErrorRef object, but the ErrorRef referred to the removed
669/// SyntaxError would become _invalid_.
670///
671/// The [nil](ErrorRef::nil) ErrorRefs are special references that are
672/// considered to be always invalid (they intentionally don't refer
673/// to any syntax error within any syntax tree).
674///
675/// Two distinct instances of the nil ErrorRef are always equal.
676#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
677pub struct ErrorRef {
678    /// An identifier of the syntax tree.
679    pub id: Id,
680
681    /// A versioned index of the [syntax error](SyntaxError) instance
682    /// within the syntax tree.
683    pub entry: Entry,
684}
685
686impl Debug for ErrorRef {
687    #[inline]
688    fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
689        match self.is_nil() {
690            false => formatter.write_fmt(format_args!(
691                "ErrorRef(id: {:?}, entry: {:?})",
692                self.id, self.entry,
693            )),
694            true => formatter.write_str("ErrorRef(Nil)"),
695        }
696    }
697}
698
699impl Identifiable for ErrorRef {
700    #[inline(always)]
701    fn id(&self) -> Id {
702        self.id
703    }
704}
705
706impl ErrorRef {
707    /// Returns an ErrorRef that intentionally does not refer
708    /// to any syntax error within any syntax tree.
709    ///
710    /// If you need just a static reference to the nil ErrorRef, use
711    /// the predefined [NIL_ERROR_REF] static.
712    #[inline(always)]
713    pub const fn nil() -> Self {
714        Self {
715            id: Id::nil(),
716            entry: Entry::nil(),
717        }
718    }
719
720    /// Returns true, if the underlying reference intentionally does not refer
721    /// to any syntax error within any syntax tree.
722    #[inline(always)]
723    pub const fn is_nil(&self) -> bool {
724        self.id.is_nil() || self.entry.is_nil()
725    }
726
727    /// Immutably borrows a syntax tree's syntax error referred to by
728    /// this ErrorRef.
729    ///
730    /// Returns None if this ErrorRef is not valid for specified `tree`.
731    #[inline(always)]
732    pub fn deref<'tree, N: Node>(
733        &self,
734        tree: &'tree impl SyntaxTree<Node = N>,
735    ) -> Option<&'tree SyntaxError> {
736        if self.id != tree.id() {
737            return None;
738        }
739
740        tree.get_error(&self.entry)
741    }
742
743    /// Returns true if the syntax error referred to by this ErrorRef exists in
744    /// the specified `tree`.
745    #[inline(always)]
746    pub fn is_valid_ref(&self, tree: &impl SyntaxTree) -> bool {
747        if self.id != tree.id() {
748            return false;
749        }
750
751        tree.has_error(&self.entry)
752    }
753}