lady_deirdre/syntax/error.rs
1////////////////////////////////////////////////////////////////////////////////
2// This file is part of "Lady Deirdre", a compiler front-end foundation //
3// technology. //
4// //
5// This work is proprietary software with source-available code. //
6// //
7// To copy, use, distribute, or contribute to this work, you must agree to //
8// the terms of the General License Agreement: //
9// //
10// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md //
11// //
12// The agreement grants a Basic Commercial License, allowing you to use //
13// this work in non-commercial and limited commercial products with a total //
14// gross revenue cap. To remove this commercial limit for one of your //
15// products, you must acquire a Full Commercial License. //
16// //
17// If you contribute to the source code, documentation, or related materials, //
18// you must grant me an exclusive license to these contributions. //
19// Contributions are governed by the "Contributions" section of the General //
20// License Agreement. //
21// //
22// Copying the work in parts is strictly forbidden, except as permitted //
23// under the General License Agreement. //
24// //
25// If you do not or cannot agree to the terms of this Agreement, //
26// do not use this work. //
27// //
28// This work is provided "as is", without any warranties, express or implied, //
29// except where such disclaimers are legally invalid. //
30// //
31// Copyright (c) 2024 Ilya Lakhin (Илья Александрович Лахин). //
32// All rights reserved. //
33////////////////////////////////////////////////////////////////////////////////
34
35use std::{
36 borrow::Cow,
37 cmp::Ordering,
38 collections::HashSet,
39 fmt::{Debug, Display, Formatter},
40 marker::PhantomData,
41};
42
43use crate::{
44 arena::{Entry, Id, Identifiable},
45 format::{AnnotationPriority, SnippetFormatter},
46 lexis::{
47 Length,
48 SiteRefSpan,
49 SourceCode,
50 ToSite,
51 ToSpan,
52 Token,
53 TokenRef,
54 TokenRule,
55 TokenSet,
56 },
57 syntax::{AbstractNode, Node, NodeRule, NodeSet, RecoveryResult, SyntaxTree, ROOT_RULE},
58 units::CompilationUnit,
59};
60
61/// An [ErrorRef] reference that does not point to any syntax error.
62///
63/// The value of this static equals to the [ErrorRef::nil] value.
64pub static NIL_ERROR_REF: ErrorRef = ErrorRef::nil();
65
66/// A syntax error that may occur during the parsing process.
67///
68/// In Lady Deirdre syntax parsing is an
69/// [infallible](crate::syntax::SyntaxSession#parsing-algorithm-considerations)
70/// process. Hence, the syntax error object represents a report of the parser's
71/// error recovery attempt.
72#[derive(Clone, PartialEq, Eq, Debug)]
73pub struct SyntaxError {
74 /// A [span of tokens](SiteRefSpan) where the error occurred.
75 pub span: SiteRefSpan,
76
77 /// A parsing rule that reported the error.
78 pub context: NodeRule,
79
80 /// A type of the recovery strategy that has been applied.
81 pub recovery: RecoveryResult,
82
83 /// A set of tokens that the parser expected in the [span](Self::span).
84 pub expected_tokens: &'static TokenSet,
85
86 /// A set of nodes that the parser expected in the [span](Self::span).
87 pub expected_nodes: &'static NodeSet,
88}
89
90impl SyntaxError {
91 /// Returns a displayable object that prints a canonical title of
92 /// this syntax error.
93 #[inline(always)]
94 pub fn title<N: AbstractNode>(&self) -> impl Display + '_ {
95 struct Title<'error, N> {
96 error: &'error SyntaxError,
97 _node: PhantomData<N>,
98 }
99
100 impl<'error, N: Node> Debug for Title<'error, N> {
101 #[inline(always)]
102 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
103 Display::fmt(self, formatter)
104 }
105 }
106
107 impl<'error, N: AbstractNode> Display for Title<'error, N> {
108 #[inline(always)]
109 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
110 match N::rule_description(self.error.context, true) {
111 Some(context) => formatter.write_fmt(format_args!("{context} syntax error.")),
112 None => formatter.write_str("Syntax error."),
113 }
114 }
115 }
116
117 Title {
118 error: self,
119 _node: PhantomData::<N>,
120 }
121 }
122
123 /// Returns a displayable object that prints a canonical message of
124 /// this syntax error.
125 ///
126 /// The `code` parameter provides access to the compilation unit's tokens
127 /// of where the error occurred.
128 #[inline(always)]
129 pub fn message<N: Node>(
130 &self,
131 code: &impl SourceCode<Token = <N as Node>::Token>,
132 ) -> impl Debug + Display + '_ {
133 struct Message<'error, N> {
134 error: &'error SyntaxError,
135 empty_span: bool,
136 _node: PhantomData<N>,
137 }
138
139 impl<'error, N: Node> Debug for Message<'error, N> {
140 #[inline(always)]
141 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
142 Display::fmt(self, formatter)
143 }
144 }
145
146 impl<'error, N: Node> Display for Message<'error, N> {
147 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
148 const LENGTH_MAX: Length = 80;
149
150 #[derive(PartialEq, Eq)]
151 enum TokenOrNode {
152 Token(Cow<'static, str>),
153 Node(Cow<'static, str>),
154 }
155
156 impl PartialOrd for TokenOrNode {
157 #[inline(always)]
158 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
159 Some(self.cmp(other))
160 }
161 }
162
163 impl Ord for TokenOrNode {
164 fn cmp(&self, other: &Self) -> Ordering {
165 match (self, other) {
166 (Self::Token(_), Self::Node(_)) => Ordering::Greater,
167 (Self::Node(_), Self::Token(_)) => Ordering::Less,
168 (Self::Token(this), Self::Token(other)) => this.cmp(other),
169 (Self::Node(this), Self::Node(other)) => this.cmp(other),
170 }
171 }
172 }
173
174 impl TokenOrNode {
175 #[inline(always)]
176 fn print_to(&self, target: &mut String) {
177 match self {
178 Self::Node(string) => target.push_str(string.as_ref()),
179
180 Self::Token(string) => {
181 target.push('\'');
182 target.push_str(string.as_ref());
183 target.push('\'');
184 }
185 }
186 }
187 }
188
189 struct OutString {
190 alt: bool,
191 set: HashSet<&'static str>,
192 empty_span: bool,
193 context: Cow<'static, str>,
194 recovery: RecoveryResult,
195 components: Vec<TokenOrNode>,
196 exhaustive: bool,
197 }
198
199 impl OutString {
200 fn new<N: Node>(
201 alt: bool,
202 capacity: usize,
203 empty_span: bool,
204 context: NodeRule,
205 recovery: RecoveryResult,
206 ) -> Self {
207 let set = HashSet::with_capacity(capacity);
208
209 let context = N::rule_description(context, true)
210 .filter(|_| context != ROOT_RULE)
211 .map(Cow::Borrowed)
212 .unwrap_or(Cow::Borrowed(""));
213
214 Self {
215 alt,
216 set,
217 empty_span,
218 context,
219 recovery,
220 components: Vec::with_capacity(capacity),
221 exhaustive: true,
222 }
223 }
224
225 fn push_token<N: Node>(&mut self, rule: TokenRule) {
226 let description = match <N as Node>::Token::rule_description(rule, self.alt)
227 {
228 Some(string) => string,
229 None => return,
230 };
231
232 if self.set.insert(description) {
233 self.components
234 .push(TokenOrNode::Token(Cow::Borrowed(description)));
235 }
236 }
237
238 fn push_node<N: Node>(&mut self, rule: NodeRule) {
239 let description = match N::rule_description(rule, self.alt) {
240 Some(string) => string,
241 None => return,
242 };
243
244 if self.set.insert(description) {
245 self.components
246 .push(TokenOrNode::Node(Cow::Borrowed(description)));
247 }
248 }
249
250 fn shorten(&mut self) -> bool {
251 if self.alt {
252 return false;
253 }
254
255 if self.components.len() <= 2 {
256 return false;
257 }
258
259 let _ = self.components.pop();
260 self.exhaustive = false;
261
262 true
263 }
264
265 #[inline(always)]
266 fn missing_str(&self) -> &'static str {
267 static STRING: &'static str = "missing";
268 static ALT_STR: &'static str = "Missing";
269
270 match self.alt {
271 false => STRING,
272 true => ALT_STR,
273 }
274 }
275
276 #[inline(always)]
277 fn unexpected_str(&self) -> &'static str {
278 static STRING: &'static str = "unexpected input";
279 static ALT_STR: &'static str = "Unexpected input";
280
281 match self.alt {
282 false => STRING,
283 true => ALT_STR,
284 }
285 }
286
287 #[inline(always)]
288 fn in_str(&self) -> &'static str {
289 static STRING: &'static str = " in ";
290 static ALT_STR: &'static str = " in ";
291
292 match self.alt {
293 false => STRING,
294 true => ALT_STR,
295 }
296 }
297
298 #[inline(always)]
299 fn eoi_str(&self) -> &'static str {
300 static STRING: &'static str = "unexpected end of input";
301 static ALT_STR: &'static str = "Unexpected end of input";
302
303 match self.alt {
304 false => STRING,
305 true => ALT_STR,
306 }
307 }
308
309 #[inline(always)]
310 fn or_str(&self) -> &'static str {
311 static STRING: &'static str = " or ";
312
313 STRING
314 }
315
316 #[inline(always)]
317 fn comma_str(&self) -> &'static str {
318 static STRING: &'static str = ", ";
319
320 STRING
321 }
322
323 #[inline(always)]
324 fn etc_str(&self) -> &'static str {
325 static STRING: &'static str = "…";
326 static ALT_STR: &'static str = "...";
327
328 match self.alt {
329 false => STRING,
330 true => ALT_STR,
331 }
332 }
333
334 fn string(&self) -> String {
335 let mut result = String::new();
336
337 let print_components;
338
339 match self.recovery {
340 RecoveryResult::InsertRecover => {
341 result.push_str(self.missing_str());
342 print_components = true;
343 }
344
345 RecoveryResult::PanicRecover if self.empty_span => {
346 result.push_str(self.missing_str());
347 print_components = true;
348 }
349
350 RecoveryResult::PanicRecover => {
351 result.push_str(self.unexpected_str());
352 print_components = false;
353 }
354
355 RecoveryResult::UnexpectedEOI => {
356 result.push_str(self.eoi_str());
357 print_components = false;
358 }
359
360 RecoveryResult::UnexpectedToken => {
361 result.push_str(self.missing_str());
362 print_components = true;
363 }
364 };
365
366 if print_components {
367 let mut is_first = true;
368
369 for component in &self.components {
370 match is_first {
371 true => {
372 result.push(' ');
373 is_first = false;
374 }
375 false => match self.components.len() == 2 && self.exhaustive {
376 true => result.push_str(self.or_str()),
377 false => result.push_str(self.comma_str()),
378 },
379 }
380
381 component.print_to(&mut result);
382 }
383 }
384
385 match self.exhaustive {
386 false => {
387 result.push_str(self.etc_str());
388 }
389
390 true => {
391 if !self.context.is_empty() {
392 result.push_str(self.in_str());
393 result.push_str(self.context.as_ref());
394 }
395
396 if self.alt {
397 result.push('.');
398 }
399 }
400 }
401
402 result
403 }
404 }
405
406 let mut out = OutString::new::<N>(
407 formatter.alternate(),
408 self.error.expected_tokens.len() + self.error.expected_nodes.len(),
409 self.empty_span,
410 self.error.context,
411 self.error.recovery,
412 );
413
414 for rule in self.error.expected_nodes {
415 out.push_node::<N>(rule);
416 }
417
418 for rule in self.error.expected_tokens {
419 out.push_token::<N>(rule);
420 }
421
422 out.components.sort();
423
424 let mut string = out.string();
425
426 while string.chars().count() > LENGTH_MAX {
427 if !out.shorten() {
428 break;
429 }
430
431 string = out.string();
432 }
433
434 formatter.write_str(string.as_ref())
435 }
436 }
437
438 let span = self.aligned_span(code);
439
440 Message {
441 error: self,
442 empty_span: span.start == span.end,
443 _node: PhantomData::<N>,
444 }
445 }
446
447 /// Returns a displayable object that prints a
448 /// [Snippet](crate::format::Snippet) that annotates the source code span
449 /// with an error message.
450 ///
451 /// The `unit` parameter provides access to the compilation unit of where
452 /// the error occurred.
453 #[inline(always)]
454 pub fn display<'a>(&'a self, unit: &'a impl CompilationUnit) -> impl Debug + Display + 'a {
455 struct DisplaySyntaxError<'a, U: CompilationUnit> {
456 error: &'a SyntaxError,
457 unit: &'a U,
458 }
459
460 impl<'a, U: CompilationUnit> Debug for DisplaySyntaxError<'a, U> {
461 #[inline(always)]
462 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
463 Display::fmt(self, formatter)
464 }
465 }
466
467 impl<'a, U: CompilationUnit> Display for DisplaySyntaxError<'a, U> {
468 #[inline]
469 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
470 let aligned_span = self.error.aligned_span(self.unit);
471
472 if !formatter.alternate() {
473 formatter.write_fmt(format_args!("{}", aligned_span.display(self.unit)))?;
474 formatter.write_str(": ")?;
475 formatter.write_fmt(format_args!(
476 "{:#}",
477 self.error.message::<U::Node>(self.unit)
478 ))?;
479
480 return Ok(());
481 }
482
483 formatter
484 .snippet(self.unit)
485 .set_caption(format!("Unit({})", self.unit.id()))
486 .set_summary(self.error.title::<U::Node>().to_string())
487 .annotate(
488 aligned_span,
489 AnnotationPriority::Primary,
490 format!("{}", self.error.message::<U::Node>(self.unit)),
491 )
492 .finish()
493 }
494 }
495
496 DisplaySyntaxError { error: self, unit }
497 }
498
499 /// Computes a [token span](SiteRefSpan) from the syntax error's original
500 /// span such that the new span would be properly aligned in regards to
501 /// the whitespaces and the line breaks surrounding the original span.
502 ///
503 /// The `code` parameter provides access to the compilation unit's tokens
504 /// of where the error occurred.
505 ///
506 /// The exact details of the underlying algorithm are not specified,
507 /// and the algorithm is subject to improvements over time in the minor
508 /// versions of this crate, but the function attempts to generate a span
509 /// that would better fit for the end-user facing rather than the original
510 /// machine-generated span.
511 #[inline(always)]
512 pub fn aligned_span(&self, code: &impl SourceCode) -> SiteRefSpan {
513 match self.recovery {
514 RecoveryResult::InsertRecover => self.widen_span(code),
515 _ => self.shorten_span(code),
516 }
517 }
518
519 fn widen_span(&self, code: &impl SourceCode) -> SiteRefSpan {
520 if !self.span.is_valid_span(code) {
521 return self.span.clone();
522 }
523
524 let mut start = self.span.start;
525 let mut end = self.span.end;
526
527 loop {
528 let previous = start.prev(code);
529
530 if previous == start {
531 break;
532 }
533
534 if !previous.is_valid_site(code) {
535 break;
536 }
537
538 if !Self::is_blank(code, previous.token_ref()) {
539 break;
540 }
541
542 start = previous;
543 }
544
545 loop {
546 if !Self::is_blank(code, end.token_ref()) {
547 break;
548 }
549
550 let next = end.next(code);
551
552 if next == end {
553 break;
554 }
555
556 if !end.is_valid_site(code) {
557 break;
558 }
559
560 end = next;
561 }
562
563 start..end
564 }
565
566 fn shorten_span(&self, code: &impl SourceCode) -> SiteRefSpan {
567 if !self.span.is_valid_span(code) {
568 return self.span.clone();
569 }
570
571 let mut start = self.span.start;
572 let mut end = self.span.end;
573
574 while start != end {
575 let previous = end.prev(code);
576
577 if previous == end {
578 break;
579 }
580
581 if !previous.is_valid_site(code) {
582 break;
583 }
584
585 if !Self::is_blank(code, previous.token_ref()) {
586 break;
587 }
588
589 end = previous;
590 }
591
592 while start != end {
593 if !Self::is_blank(code, start.token_ref()) {
594 break;
595 }
596
597 let next = start.next(code);
598
599 if next == start {
600 break;
601 }
602
603 if !next.is_valid_site(code) {
604 break;
605 }
606
607 start = next;
608 }
609
610 if start == end {
611 let mut site_ref = self.span.start;
612
613 loop {
614 let previous = site_ref.prev(code);
615
616 if previous == site_ref {
617 break;
618 }
619
620 if !previous.is_valid_site(code) {
621 break;
622 }
623
624 if !Self::is_blank(code, previous.token_ref()) {
625 break;
626 }
627
628 site_ref = previous;
629 }
630
631 start = site_ref;
632 end = site_ref;
633 }
634
635 start..end
636 }
637
638 #[inline(always)]
639 fn is_blank(code: &impl SourceCode, token_ref: &TokenRef) -> bool {
640 let Some(string) = token_ref.string(code) else {
641 return false;
642 };
643
644 string
645 .as_bytes()
646 .iter()
647 .all(|&ch| ch == b' ' || ch == b'\t' || ch == b'\r' || ch == b'\n' || ch == b'\x0c')
648 }
649}
650
651/// A globally unique reference of the [syntax error](SyntaxError) in the
652/// syntax tree.
653///
654/// Each [syntax tree's](SyntaxTree) syntax error could be uniquely
655/// addressed within a pair of the [Id] and [Entry], where the identifier
656/// uniquely addresses a specific compilation unit instance (syntax tree), and
657/// the entry part addresses a syntax error within this tree.
658///
659/// Essentially, ErrorRef is a composite index.
660///
661/// Both components of this index form a unique pair
662/// (within the current process), because each compilation unit has a unique
663/// identifier, and the syntax errors within the syntax tree always receive
664/// unique [Entry] indices within the syntax tree.
665///
666/// If the syntax error instance has been removed from the syntax tree
667/// over time, new syntax error within this syntax tree will never occupy
668/// the same ErrorRef object, but the ErrorRef referred to the removed
669/// SyntaxError would become _invalid_.
670///
671/// The [nil](ErrorRef::nil) ErrorRefs are special references that are
672/// considered to be always invalid (they intentionally don't refer
673/// to any syntax error within any syntax tree).
674///
675/// Two distinct instances of the nil ErrorRef are always equal.
676#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
677pub struct ErrorRef {
678 /// An identifier of the syntax tree.
679 pub id: Id,
680
681 /// A versioned index of the [syntax error](SyntaxError) instance
682 /// within the syntax tree.
683 pub entry: Entry,
684}
685
686impl Debug for ErrorRef {
687 #[inline]
688 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
689 match self.is_nil() {
690 false => formatter.write_fmt(format_args!(
691 "ErrorRef(id: {:?}, entry: {:?})",
692 self.id, self.entry,
693 )),
694 true => formatter.write_str("ErrorRef(Nil)"),
695 }
696 }
697}
698
699impl Identifiable for ErrorRef {
700 #[inline(always)]
701 fn id(&self) -> Id {
702 self.id
703 }
704}
705
706impl ErrorRef {
707 /// Returns an ErrorRef that intentionally does not refer
708 /// to any syntax error within any syntax tree.
709 ///
710 /// If you need just a static reference to the nil ErrorRef, use
711 /// the predefined [NIL_ERROR_REF] static.
712 #[inline(always)]
713 pub const fn nil() -> Self {
714 Self {
715 id: Id::nil(),
716 entry: Entry::nil(),
717 }
718 }
719
720 /// Returns true, if the underlying reference intentionally does not refer
721 /// to any syntax error within any syntax tree.
722 #[inline(always)]
723 pub const fn is_nil(&self) -> bool {
724 self.id.is_nil() || self.entry.is_nil()
725 }
726
727 /// Immutably borrows a syntax tree's syntax error referred to by
728 /// this ErrorRef.
729 ///
730 /// Returns None if this ErrorRef is not valid for specified `tree`.
731 #[inline(always)]
732 pub fn deref<'tree, N: Node>(
733 &self,
734 tree: &'tree impl SyntaxTree<Node = N>,
735 ) -> Option<&'tree SyntaxError> {
736 if self.id != tree.id() {
737 return None;
738 }
739
740 tree.get_error(&self.entry)
741 }
742
743 /// Returns true if the syntax error referred to by this ErrorRef exists in
744 /// the specified `tree`.
745 #[inline(always)]
746 pub fn is_valid_ref(&self, tree: &impl SyntaxTree) -> bool {
747 if self.id != tree.id() {
748 return false;
749 }
750
751 tree.has_error(&self.entry)
752 }
753}