1use std::fmt;
9
10use logos::Logos;
11
12use super::Diagnostic;
13use super::Span;
14use super::lexer::Lexer;
15use super::lexer::LexerResult;
16use super::lexer::TokenSet;
17use super::tree::SyntaxKind;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum Event {
25 NodeStarted {
27 kind: SyntaxKind,
29 forward_parent: Option<usize>,
33 },
34
35 NodeFinished,
37
38 Token {
40 kind: SyntaxKind,
42 span: Span,
44 },
45}
46
47impl Event {
48 pub fn abandoned() -> Self {
50 Self::NodeStarted {
51 kind: SyntaxKind::Abandoned,
52 forward_parent: None,
53 }
54 }
55}
56
57struct Expected<'a> {
60 items: &'a [&'a str],
62}
63
64impl<'a> Expected<'a> {
65 fn new(items: &'a [&'a str]) -> Self {
67 Self { items }
68 }
69}
70
71impl fmt::Display for Expected<'_> {
72 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73 let count = self.items.len();
74 for (i, item) in self.items.iter().enumerate() {
75 if i > 0 {
76 if count == 2 {
77 write!(f, " or ")?;
78 } else if i == count - 1 {
79 write!(f, ", or ")?;
80 } else {
81 write!(f, ", ")?;
82 }
83 }
84
85 write!(f, "{item}")?;
86 }
87
88 Ok(())
89 }
90}
91
92pub(crate) fn expected_found(expected: &str, found: Option<&str>, span: Span) -> Diagnostic {
94 let found = found.unwrap_or("end of input");
95 Diagnostic::error(format!("expected {expected}, but found {found}"))
96 .with_label(format!("unexpected {found}"), span)
97}
98
99pub(crate) fn expected_one_of(expected: &[&str], found: Option<&str>, span: Span) -> Diagnostic {
101 let found = found.unwrap_or("end of input");
102 Diagnostic::error(format!(
103 "expected {expected}, but found {found}",
104 expected = Expected::new(expected)
105 ))
106 .with_label(format!("unexpected {found}"), span)
107}
108
109pub(crate) fn unterminated_string(span: Span) -> Diagnostic {
111 Diagnostic::error("an unterminated string was encountered")
112 .with_label("this quote is not matched", span)
113}
114
115pub(crate) fn unterminated_heredoc(opening: &str, span: Span, command: bool) -> Diagnostic {
117 Diagnostic::error(format!(
118 "an unterminated {kind} was encountered",
119 kind = if command {
120 "heredoc command"
121 } else {
122 "multi-line string"
123 }
124 ))
125 .with_label(format!("this {opening} is not matched"), span)
126}
127
128pub(crate) fn unterminated_braced_command(opening: &str, span: Span) -> Diagnostic {
130 Diagnostic::error("an unterminated braced command was encountered")
131 .with_label(format!("this {opening} is not matched"), span)
132}
133
134pub(crate) fn unmatched(
136 open: &str,
137 open_span: Span,
138 close: &str,
139 found: &str,
140 span: Span,
141) -> Diagnostic {
142 expected_found(close, Some(found), span)
143 .with_label(format!("this {open} is not matched"), open_span)
144}
145
146pub trait ParserToken<'a>: Eq + Copy + Logos<'a, Source = str, Error = (), Extras = ()> {
148 fn into_syntax(self) -> SyntaxKind;
150
151 fn into_raw(self) -> u8;
153
154 fn from_raw(token: u8) -> Self;
156
157 fn describe(self) -> &'static str;
159
160 fn is_trivia(self) -> bool;
165
166 #[allow(unused_variables)]
168 fn recover_interpolation(self, start: Span, parser: &mut Parser<'a, Self>) -> bool {
169 false
170 }
171}
172
173#[derive(Debug)]
180pub struct Marker(usize);
181
182impl Marker {
183 fn new(pos: usize) -> Marker {
185 Self(pos)
186 }
187
188 pub fn complete<'a, T>(self, parser: &mut Parser<'a, T>, kind: SyntaxKind) -> CompletedMarker
190 where
191 T: ParserToken<'a>,
192 {
193 match &mut parser.events[self.0] {
195 Event::NodeStarted { kind: existing, .. } => {
196 *existing = kind;
197 }
198 _ => unreachable!(),
199 }
200
201 parser.events.push(Event::NodeFinished);
202 let m = CompletedMarker::new(self.0, kind);
203 std::mem::forget(self);
204 m
205 }
206
207 pub fn abandon<'a, T>(self, parser: &mut Parser<'a, T>)
209 where
210 T: ParserToken<'a>,
211 {
212 if self.0 == parser.events.len() - 1 {
214 match parser.events.pop() {
215 Some(Event::NodeStarted {
216 kind: SyntaxKind::Abandoned,
217 forward_parent: None,
218 }) => (),
219 _ => unreachable!(),
220 }
221 }
222
223 std::mem::forget(self);
224 }
225}
226
227impl Drop for Marker {
228 fn drop(&mut self) {
229 if !std::thread::panicking() {
230 panic!("marker was dropped without it being completed or abandoned");
231 }
232 }
233}
234
235#[derive(Debug, Clone, Copy)]
237pub struct CompletedMarker {
238 pos: usize,
240 kind: SyntaxKind,
242}
243
244impl CompletedMarker {
245 fn new(pos: usize, kind: SyntaxKind) -> Self {
248 CompletedMarker { pos, kind }
249 }
250
251 pub fn precede<'a, T>(self, parser: &mut Parser<'a, T>) -> Marker
253 where
254 T: ParserToken<'a>,
255 {
256 let new_pos = parser.start();
257 match &mut parser.events[self.pos] {
258 Event::NodeStarted { forward_parent, .. } => {
259 *forward_parent = Some(new_pos.0 - self.pos);
260 }
261 _ => unreachable!(),
262 }
263 new_pos
264 }
265
266 pub fn extend_to<'a, T>(self, parser: &mut Parser<'a, T>, marker: Marker) -> CompletedMarker
268 where
269 T: ParserToken<'a>,
270 {
271 let pos = marker.0;
272 std::mem::forget(marker);
273 match &mut parser.events[pos] {
274 Event::NodeStarted { forward_parent, .. } => {
275 *forward_parent = Some(self.pos - pos);
276 }
277 _ => unreachable!(),
278 }
279 self
280 }
281
282 pub fn kind(&self) -> SyntaxKind {
284 self.kind
285 }
286}
287
288#[allow(missing_debug_implementations)]
292pub struct Interpolator<'a, T>
293where
294 T: Logos<'a, Extras = ()>,
295{
296 lexer: Lexer<'a, T>,
298 events: Vec<Event>,
300 recovery: Vec<TokenSet>,
302 diagnostics: Vec<Diagnostic>,
304 buffered: Vec<Event>,
306}
307
308impl<'a, T> Interpolator<'a, T>
309where
310 T: Logos<'a, Source = str, Error = (), Extras = ()> + Copy,
311{
312 pub fn event(&mut self, event: Event) {
314 self.events.push(event);
315 }
316
317 pub fn diagnostic(&mut self, diagnostic: Diagnostic) {
319 self.diagnostics.push(diagnostic);
320 }
321
322 pub fn start(&mut self) -> Marker {
324 if !self.buffered.is_empty() {
326 self.events.append(&mut self.buffered);
327 }
328
329 let pos = self.events.len();
330 self.events.push(Event::NodeStarted {
331 kind: SyntaxKind::Abandoned,
332 forward_parent: None,
333 });
334 Marker::new(pos)
335 }
336
337 pub fn span(&self) -> Span {
339 self.lexer.span()
340 }
341
342 pub fn into_parser<T2>(self) -> Parser<'a, T2>
344 where
345 T2: ParserToken<'a>,
346 T::Extras: Into<T2::Extras>,
347 {
348 Parser {
349 lexer: Some(self.lexer.morph()),
350 events: self.events,
351 recovery: self.recovery,
352 diagnostics: self.diagnostics,
353 buffered: Default::default(),
354 }
355 }
356}
357
358impl<'a, T> Iterator for Interpolator<'a, T>
359where
360 T: Logos<'a, Error = (), Extras = ()> + Copy,
361{
362 type Item = (LexerResult<T>, Span);
363
364 fn next(&mut self) -> Option<Self::Item> {
365 self.lexer.next()
366 }
367}
368
369#[allow(missing_debug_implementations)]
371pub struct Output<'a, T>
372where
373 T: ParserToken<'a>,
374{
375 pub lexer: Lexer<'a, T>,
377 pub events: Vec<Event>,
379 pub diagnostics: Vec<Diagnostic>,
381}
382
383#[derive(Debug, Copy, Clone)]
387pub struct Peek2<T> {
388 pub first: (T, Span),
390 pub second: (T, Span),
392}
393
394#[allow(missing_debug_implementations)]
399pub struct Parser<'a, T>
400where
401 T: ParserToken<'a>,
402{
403 lexer: Option<Lexer<'a, T>>,
409 events: Vec<Event>,
411 recovery: Vec<TokenSet>,
413 diagnostics: Vec<Diagnostic>,
415 buffered: Vec<Event>,
417}
418
419impl<'a, T> Parser<'a, T>
420where
421 T: ParserToken<'a>,
422{
423 pub fn new(lexer: Lexer<'a, T>) -> Self {
425 Self {
426 lexer: Some(lexer),
427 events: Default::default(),
428 recovery: Default::default(),
429 diagnostics: Default::default(),
430 buffered: Default::default(),
431 }
432 }
433
434 pub fn span(&self) -> Span {
436 self.lexer.as_ref().expect("expected a lexer").span()
437 }
438
439 pub fn source(&self, span: Span) -> &'a str {
441 self.lexer.as_ref().expect("expected a lexer").source(span)
442 }
443
444 pub fn peek(&mut self) -> Option<(T, Span)> {
457 while let Some((res, span)) = self.lexer.as_mut()?.peek() {
458 if let Some(t) = self.consume_trivia(res, span, true) {
459 return Some(t);
460 }
461 }
462
463 None
464 }
465
466 pub fn peek2(&mut self) -> Option<Peek2<T>> {
471 let first = self.peek()?;
472
473 let mut lexer = self
477 .lexer
478 .as_ref()
479 .expect("there should be a lexer")
480 .clone();
481 lexer
482 .next()
483 .unwrap()
484 .0
485 .expect("should have peeked at a valid token");
486 while let Some((Ok(token), span)) = lexer.next() {
487 if token.is_trivia() {
488 continue;
490 }
491
492 return Some(Peek2 {
493 first,
494 second: (token, span),
495 });
496 }
497
498 None
499 }
500
501 pub fn next_if(&mut self, token: T) -> bool {
505 match self.peek() {
506 Some((t, _)) if t == token => {
507 self.next();
508 true
509 }
510 _ => false,
511 }
512 }
513
514 pub fn matching<F>(
519 &mut self,
520 open: T,
521 close: T,
522 allow_empty: bool,
523 cb: F,
524 ) -> Result<(), Diagnostic>
525 where
526 F: FnOnce(&mut Self, Span) -> Result<(), Diagnostic>,
527 {
528 let open_span = self.expect(open)?;
529
530 if allow_empty {
532 match self.peek() {
533 Some((t, _)) if t == close => {
534 self.next();
535 return Ok(());
536 }
537 _ => {}
538 }
539 }
540
541 cb(self, open_span)?;
542
543 match self.next() {
544 Some((token, _)) if token == close => Ok(()),
545 found => {
546 let (found, span) = found
547 .map(|(t, s)| (t.describe(), s))
548 .unwrap_or_else(|| ("end of input", self.span()));
549
550 Err(unmatched(
551 open.describe(),
552 open_span,
553 close.describe(),
554 found,
555 span,
556 ))
557 }
558 }
559 }
560
561 pub fn matching_delimited<F>(
569 &mut self,
570 open: T,
571 close: T,
572 delimiter: Option<T>,
573 recovery: TokenSet,
574 cb: F,
575 ) -> Result<(), Diagnostic>
576 where
577 F: FnMut(&mut Self, Marker) -> Result<(), (Marker, Diagnostic)>,
578 {
579 let open_span = self.expect(open)?;
580 self.delimited(close, delimiter, recovery, cb);
581 self.consume_close_token(open, open_span, close);
582 Ok(())
583 }
584
585 pub fn consume_close_token(&mut self, open: T, open_span: Span, close: T) {
590 if self.next_if(close) {
591 return;
592 }
593
594 let (found, span) = self
595 .peek()
596 .map(|(t, s)| (t.describe(), s))
597 .unwrap_or_else(|| ("end of input", self.span()));
598
599 self.diagnostic(unmatched(
600 open.describe(),
601 open_span,
602 close.describe(),
603 found,
604 span,
605 ));
606
607 self.events.push(Event::Token {
609 kind: close.into_syntax(),
610 span: Span::new(span.start(), 0),
611 });
612 }
613
614 pub fn delimited<F>(&mut self, until: T, delimiter: Option<T>, recovery: TokenSet, mut cb: F)
621 where
622 F: FnMut(&mut Self, Marker) -> Result<(), (Marker, Diagnostic)>,
623 {
624 let recovery = if let Some(delimiter) = delimiter {
625 recovery.union(TokenSet::new(&[until.into_raw(), delimiter.into_raw()]))
626 } else {
627 recovery.union(TokenSet::new(&[until.into_raw()]))
628 };
629
630 let parent = self.recovery.last().copied();
631 self.recovery.push(recovery);
632
633 let mut next: Option<(T, Span)> = self.peek();
634 while let Some((token, _)) = next {
635 if token == until {
636 break;
637 }
638
639 let mut lexer = self.lexer.clone();
640 let marker = self.start();
641 if let Err((marker, e)) = cb(self, marker) {
642 if let Some((Ok(token), _)) = lexer.as_mut().expect("should have a lexer").peek()
643 && !recovery.contains(token.into_raw())
644 {
645 if let Some(parent) = &parent
648 && parent.contains(token.into_raw())
649 {
650 self.events.truncate(marker.0);
652 marker.abandon(self);
653
654 self.buffered.clear();
656 self.lexer = lexer;
657 break;
658 }
659 }
660
661 self.recover(e);
662 marker.abandon(self);
663 }
664
665 next = self.peek();
666
667 if let Some(delimiter) = delimiter
668 && let Some((token, _)) = next
669 {
670 if token == until {
671 break;
672 }
673
674 if let Err(e) = self.expect(delimiter) {
675 let e = if let Some(span) = self.events.iter().rev().find_map(|e| match e {
679 Event::Token { kind, span }
680 if *kind != SyntaxKind::Whitespace && *kind != SyntaxKind::Comment =>
681 {
682 Some(*span)
683 }
684 _ => None,
685 }) {
686 e.with_label(
687 format!(
688 "consider adding a {desc} after this",
689 desc = delimiter.describe()
690 ),
691 Span::new(span.end() - 1, 1),
692 )
693 } else {
694 e
695 };
696
697 self.recover(e);
698 self.next_if(delimiter);
699 }
700
701 next = self.peek();
702 }
703 }
704
705 self.recovery.pop();
706 }
707
708 pub fn diagnostic(&mut self, diagnostic: Diagnostic) {
710 self.diagnostics.push(diagnostic);
711 }
712
713 pub fn push_recovery_set(&mut self, tokens: TokenSet) {
715 self.recovery.push(tokens);
716 }
717
718 pub fn pop_recovery_set(&mut self) {
724 self.recovery.pop().expect("should pop");
725 }
726
727 pub fn recover(&mut self, mut diagnostic: Diagnostic) {
734 let tokens = *self.recovery.last().expect("expected a top recovery set");
735
736 while let Some((token, span)) = self.peek() {
737 if tokens.contains(token.into_raw()) {
738 break;
739 }
740
741 self.next().unwrap();
742
743 if T::recover_interpolation(token, span, self) {
747 for label in diagnostic.labels_mut() {
750 let label_span = label.span();
751 if label_span.start() != span.start() {
752 continue;
753 }
754
755 label.set_span(Span::new(
757 label_span.start(),
758 self.lexer
759 .as_ref()
760 .expect("should have a lexer")
761 .span()
762 .end()
763 - label_span.end()
764 + 1,
765 ));
766 }
767 }
768 }
769
770 self.diagnostics.push(diagnostic);
771 }
772
773 pub fn recover_with_set(&mut self, diagnostic: Diagnostic, recovery: TokenSet) {
775 self.recovery.push(recovery);
776 self.recover(diagnostic);
777 self.recovery.pop();
778 }
779
780 pub fn start(&mut self) -> Marker {
782 if !self.events.is_empty() {
785 self.peek();
786
787 if !self.buffered.is_empty() {
789 self.events.append(&mut self.buffered);
790 }
791 }
792
793 let pos = self.events.len();
794 self.events.push(Event::NodeStarted {
795 kind: SyntaxKind::Abandoned,
796 forward_parent: None,
797 });
798 Marker::new(pos)
799 }
800
801 pub fn require(&mut self, token: T) -> Span {
805 match self.next() {
806 Some((t, span)) if t == token => span,
807 _ => panic!(
808 "lexer not at required token {token}",
809 token = token.describe()
810 ),
811 }
812 }
813
814 pub fn require_in(&mut self, tokens: TokenSet) {
820 match self.next() {
821 Some((t, _)) if tokens.contains(t.into_raw()) => {}
822 found => {
823 let found = found.map(|(t, _)| t.describe());
824 panic!(
825 "unexpected token {found}",
826 found = found.unwrap_or("end of input")
827 );
828 }
829 }
830 }
831
832 pub fn expect(&mut self, token: T) -> Result<Span, Diagnostic> {
836 match self.peek() {
837 Some((t, span)) if t == token => {
838 self.next();
839 Ok(span)
840 }
841 found => {
842 let (found, span) = found
843 .map(|(t, s)| (Some(t.describe()), s))
844 .unwrap_or_else(|| (None, self.span()));
845 Err(expected_found(token.describe(), found, span))
846 }
847 }
848 }
849
850 pub fn expect_with_name(&mut self, token: T, name: &'static str) -> Result<Span, Diagnostic> {
855 match self.peek() {
856 Some((t, span)) if t == token => {
857 self.next();
858 Ok(span)
859 }
860 found => {
861 let (found, span) = found
862 .map(|(t, s)| (Some(t.describe()), s))
863 .unwrap_or_else(|| (None, self.span()));
864 Err(expected_found(name, found, span))
865 }
866 }
867 }
868
869 pub fn expect_in(
873 &mut self,
874 tokens: TokenSet,
875 expected: &[&str],
876 ) -> Result<(T, Span), Diagnostic> {
877 match self.peek() {
878 Some((t, span)) if tokens.contains(t.into_raw()) => {
879 self.next();
880 Ok((t, span))
881 }
882 found => {
883 let (found, span) = found
884 .map(|(t, s)| (Some(t.describe()), s))
885 .unwrap_or_else(|| (None, self.span()));
886
887 Err(expected_one_of(expected, found, span))
888 }
889 }
890 }
891
892 pub fn interpolate<T2, F, R>(&mut self, cb: F) -> R
899 where
900 T2: Logos<'a, Source = str, Error = (), Extras = ()> + Copy,
901 F: FnOnce(Interpolator<'a, T2>) -> (Parser<'a, T>, R),
902 {
903 let input = Interpolator {
904 lexer: std::mem::take(&mut self.lexer)
905 .expect("lexer should exist")
906 .morph(),
907 recovery: std::mem::take(&mut self.recovery),
908 events: std::mem::take(&mut self.events),
909 diagnostics: std::mem::take(&mut self.diagnostics),
910 buffered: std::mem::take(&mut self.buffered),
911 };
912 let (p, result) = cb(input);
913 *self = p;
914 result
915 }
916
917 pub fn morph<T2>(self) -> Parser<'a, T2>
922 where
923 T2: ParserToken<'a>,
924 T::Extras: Into<T2::Extras>,
925 {
926 Parser {
927 lexer: self.lexer.map(|l| l.morph()),
928 events: self.events,
929 recovery: self.recovery,
930 diagnostics: self.diagnostics,
931 buffered: self.buffered,
932 }
933 }
934
935 pub fn into_interpolator<T2>(self) -> Interpolator<'a, T2>
937 where
938 T2: Logos<'a, Source = str, Error = (), Extras = ()> + Copy,
939 {
940 Interpolator {
941 lexer: self.lexer.expect("lexer should be present").morph(),
942 events: self.events,
943 recovery: self.recovery,
944 diagnostics: self.diagnostics,
945 buffered: self.buffered,
946 }
947 }
948
949 pub fn finish(self) -> Output<'a, T> {
958 assert!(
959 self.buffered.is_empty(),
960 "buffered events remain; ensure `next` was called after an unsuccessful peek"
961 );
962
963 Output {
964 lexer: self.lexer.expect("lexer should be present"),
965 events: self.events,
966 diagnostics: self.diagnostics,
967 }
968 }
969
970 pub fn update_last_token_kind(&mut self, new_kind: SyntaxKind) {
976 let last = self.events.last_mut().expect("expected a last event");
977 match last {
978 Event::Token { kind, .. } => *kind = new_kind,
979 _ => panic!("the last event is not a token"),
980 }
981 }
982
983 pub fn consume_remainder(&mut self) {
989 if !self.buffered.is_empty() {
990 self.events.append(&mut self.buffered);
991 }
992
993 if let Some(span) = self
994 .lexer
995 .as_mut()
996 .expect("there should be a lexer")
997 .consume_remainder()
998 {
999 self.events.push(Event::Token {
1000 kind: SyntaxKind::Unparsed,
1001 span,
1002 });
1003 }
1004 }
1005
1006 fn consume_trivia(
1008 &mut self,
1009 res: LexerResult<T>,
1010 span: Span,
1011 peeked: bool,
1012 ) -> Option<(T, Span)> {
1013 if !peeked && !self.buffered.is_empty() {
1015 self.events.append(&mut self.buffered);
1016 }
1017
1018 let event = match res {
1019 Ok(token) => {
1020 if !token.is_trivia() {
1021 return Some((token, span));
1022 }
1023
1024 Event::Token {
1025 kind: token.into_syntax(),
1026 span,
1027 }
1028 }
1029 Err(_) => {
1030 self.diagnostic(
1031 Diagnostic::error("an unknown token was encountered")
1032 .with_label(Self::unsupported_token_text(self.source(span)), span),
1033 );
1034 Event::Token {
1035 kind: SyntaxKind::Unknown,
1036 span,
1037 }
1038 }
1039 };
1040
1041 if peeked {
1042 self.lexer.as_mut().expect("should have a lexer").next();
1043 self.buffered.push(event);
1044 } else {
1045 self.events.push(event);
1046 }
1047 None
1048 }
1049
1050 fn unsupported_token_text(token: &str) -> &'static str {
1052 match token {
1053 "&" => "did you mean to use `&&` here?",
1054 "|" => "did you mean to use `||` here?",
1055 _ => "this is not a supported WDL token",
1056 }
1057 }
1058}
1059
1060impl<'a, T> Iterator for Parser<'a, T>
1061where
1062 T: ParserToken<'a>,
1063{
1064 type Item = (T, Span);
1065
1066 fn next(&mut self) -> Option<(T, Span)> {
1067 while let Some((res, span)) = self.lexer.as_mut()?.next() {
1068 if let Some((token, span)) = self.consume_trivia(res, span, false) {
1069 self.events.push(Event::Token {
1070 kind: token.into_syntax(),
1071 span,
1072 });
1073 return Some((token, span));
1074 }
1075 }
1076
1077 if !self.buffered.is_empty() {
1078 self.events.append(&mut self.buffered);
1079 }
1080
1081 None
1082 }
1083}