wdl_grammar/
parser.rs

1//! Module for the parser implementation.
2//!
3//! The parser consumes a token stream from a lexer and produces
4//! a list of parser events that can be used to construct a CST.
5//!
6//! The design of this is very much based on `rust-analyzer`.
7
8use std::fmt;
9
10use logos::Logos;
11
12use super::Diagnostic;
13use super::Span;
14use super::lexer::Lexer;
15use super::lexer::LexerResult;
16use super::lexer::TokenSet;
17use super::tree::SyntaxKind;
18
19/// Represents an event produced by the parser.
20///
21/// The parser produces a stream of events that can be used to construct
22/// a CST.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum Event {
25    /// A new node has started.
26    NodeStarted {
27        /// The kind of the node.
28        kind: SyntaxKind,
29        /// For left-recursive syntactic constructs, the parser produces
30        /// a child node before it sees a parent. `forward_parent`
31        /// saves the position of current event's parent.
32        forward_parent: Option<usize>,
33    },
34
35    /// A node has finished.
36    NodeFinished,
37
38    /// A token was encountered.
39    Token {
40        /// The syntax kind of the token.
41        kind: SyntaxKind,
42        /// The source span of the token.
43        span: Span,
44    },
45}
46
47impl Event {
48    /// Gets an start node event for an abandoned node.
49    pub fn abandoned() -> Self {
50        Self::NodeStarted {
51            kind: SyntaxKind::Abandoned,
52            forward_parent: None,
53        }
54    }
55}
56
57/// Utility type for displaying "expected" items in a parser expectation
58/// diagnostic.
59struct Expected<'a> {
60    /// The set of expected items.
61    items: &'a [&'a str],
62}
63
64impl<'a> Expected<'a> {
65    /// Constructs a new `Expected`.
66    fn new(items: &'a [&'a str]) -> Self {
67        Self { items }
68    }
69}
70
71impl fmt::Display for Expected<'_> {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        let count = self.items.len();
74        for (i, item) in self.items.iter().enumerate() {
75            if i > 0 {
76                if count == 2 {
77                    write!(f, " or ")?;
78                } else if i == count - 1 {
79                    write!(f, ", or ")?;
80                } else {
81                    write!(f, ", ")?;
82                }
83            }
84
85            write!(f, "{item}")?;
86        }
87
88        Ok(())
89    }
90}
91
92/// Creates an "expected, but found" diagnostic error.
93pub(crate) fn expected_found(expected: &str, found: Option<&str>, span: Span) -> Diagnostic {
94    let found = found.unwrap_or("end of input");
95    Diagnostic::error(format!("expected {expected}, but found {found}"))
96        .with_label(format!("unexpected {found}"), span)
97}
98
99/// Creates an "expected one of, but found" diagnostic error.
100pub(crate) fn expected_one_of(expected: &[&str], found: Option<&str>, span: Span) -> Diagnostic {
101    let found = found.unwrap_or("end of input");
102    Diagnostic::error(format!(
103        "expected {expected}, but found {found}",
104        expected = Expected::new(expected)
105    ))
106    .with_label(format!("unexpected {found}"), span)
107}
108
109/// Creates an "unterminated string" diagnostic error.
110pub(crate) fn unterminated_string(span: Span) -> Diagnostic {
111    Diagnostic::error("an unterminated string was encountered")
112        .with_label("this quote is not matched", span)
113}
114
115/// Creates an "unterminated heredoc" diagnostic error.
116pub(crate) fn unterminated_heredoc(opening: &str, span: Span, command: bool) -> Diagnostic {
117    Diagnostic::error(format!(
118        "an unterminated {kind} was encountered",
119        kind = if command {
120            "heredoc command"
121        } else {
122            "multi-line string"
123        }
124    ))
125    .with_label(format!("this {opening} is not matched"), span)
126}
127
128/// Creates an "unterminated braced command" diagnostic error.
129pub(crate) fn unterminated_braced_command(opening: &str, span: Span) -> Diagnostic {
130    Diagnostic::error("an unterminated braced command was encountered")
131        .with_label(format!("this {opening} is not matched"), span)
132}
133
134/// Creates an "unmatched token" diagnostic error.
135pub(crate) fn unmatched(
136    open: &str,
137    open_span: Span,
138    close: &str,
139    found: &str,
140    span: Span,
141) -> Diagnostic {
142    expected_found(close, Some(found), span)
143        .with_label(format!("this {open} is not matched"), open_span)
144}
145
146/// A trait implemented by parser tokens.
147pub trait ParserToken<'a>: Eq + Copy + Logos<'a, Source = str, Error = (), Extras = ()> {
148    /// Converts the token into its syntax representation.
149    fn into_syntax(self) -> SyntaxKind;
150
151    /// Converts the token into its "raw" representation.
152    fn into_raw(self) -> u8;
153
154    /// Converts from a raw token into the parser token.
155    fn from_raw(token: u8) -> Self;
156
157    /// Describes a raw token.
158    fn describe(self) -> &'static str;
159
160    /// Determines if the token is trivia that should be skipped over
161    /// by the parser.
162    ///
163    /// Trivia tokens are still added to the concrete syntax tree.
164    fn is_trivia(self) -> bool;
165
166    /// A helper for recovering at an interpolation point.
167    #[allow(unused_variables)]
168    fn recover_interpolation(self, start: Span, parser: &mut Parser<'a, Self>) -> bool {
169        false
170    }
171}
172
173/// Marks the start of a node in the event list.
174///
175/// # Panics
176///
177/// Markers must either be completed or abandoned before being dropped;
178/// otherwise, a panic will occur.
179#[derive(Debug)]
180pub struct Marker(usize);
181
182impl Marker {
183    /// Constructs a new `Marker`.
184    fn new(pos: usize) -> Marker {
185        Self(pos)
186    }
187
188    /// Completes the syntax tree node.
189    pub fn complete<'a, T>(self, parser: &mut Parser<'a, T>, kind: SyntaxKind) -> CompletedMarker
190    where
191        T: ParserToken<'a>,
192    {
193        // Update the node kind and push a finished event
194        match &mut parser.events[self.0] {
195            Event::NodeStarted { kind: existing, .. } => {
196                *existing = kind;
197            }
198            _ => unreachable!(),
199        }
200
201        parser.events.push(Event::NodeFinished);
202        let m = CompletedMarker::new(self.0, kind);
203        std::mem::forget(self);
204        m
205    }
206
207    /// Abandons the node due to an error.
208    pub fn abandon<'a, T>(self, parser: &mut Parser<'a, T>)
209    where
210        T: ParserToken<'a>,
211    {
212        // If the current node has no children, just pop it from the event list
213        if self.0 == parser.events.len() - 1 {
214            match parser.events.pop() {
215                Some(Event::NodeStarted {
216                    kind: SyntaxKind::Abandoned,
217                    forward_parent: None,
218                }) => (),
219                _ => unreachable!(),
220            }
221        }
222
223        std::mem::forget(self);
224    }
225}
226
227impl Drop for Marker {
228    fn drop(&mut self) {
229        if !std::thread::panicking() {
230            panic!("marker was dropped without it being completed or abandoned");
231        }
232    }
233}
234
235/// Represents a marker for a node that has been completed.
236#[derive(Debug, Clone, Copy)]
237pub struct CompletedMarker {
238    /// Marks the position in the event list where the node was started.
239    pos: usize,
240    /// The kind of the completed node.
241    kind: SyntaxKind,
242}
243
244impl CompletedMarker {
245    /// Constructs a new completed marker with the given start position and
246    /// syntax kind.
247    fn new(pos: usize, kind: SyntaxKind) -> Self {
248        CompletedMarker { pos, kind }
249    }
250
251    /// Creates a new node that precedes the completed node.
252    pub fn precede<'a, T>(self, parser: &mut Parser<'a, T>) -> Marker
253    where
254        T: ParserToken<'a>,
255    {
256        let new_pos = parser.start();
257        match &mut parser.events[self.pos] {
258            Event::NodeStarted { forward_parent, .. } => {
259                *forward_parent = Some(new_pos.0 - self.pos);
260            }
261            _ => unreachable!(),
262        }
263        new_pos
264    }
265
266    /// Extends the completed marker to the left up to `marker`.
267    pub fn extend_to<'a, T>(self, parser: &mut Parser<'a, T>, marker: Marker) -> CompletedMarker
268    where
269        T: ParserToken<'a>,
270    {
271        let pos = marker.0;
272        std::mem::forget(marker);
273        match &mut parser.events[pos] {
274            Event::NodeStarted { forward_parent, .. } => {
275                *forward_parent = Some(self.pos - pos);
276            }
277            _ => unreachable!(),
278        }
279        self
280    }
281
282    /// Gets the kind of the completed marker.
283    pub fn kind(&self) -> SyntaxKind {
284        self.kind
285    }
286}
287
288/// A utility type used during string interpolation.
289///
290/// See the [Parser::interpolate] method.
291#[allow(missing_debug_implementations)]
292pub struct Interpolator<'a, T>
293where
294    T: Logos<'a, Extras = ()>,
295{
296    /// The lexer to use for the interpolation.
297    lexer: Lexer<'a, T>,
298    /// The parser events.
299    events: Vec<Event>,
300    /// The recovery token set stack.
301    recovery: Vec<TokenSet>,
302    /// The parser diagnostics.
303    diagnostics: Vec<Diagnostic>,
304    /// The buffered events from a peek operation.
305    buffered: Vec<Event>,
306}
307
308impl<'a, T> Interpolator<'a, T>
309where
310    T: Logos<'a, Source = str, Error = (), Extras = ()> + Copy,
311{
312    /// Adds an event to the parser event list.
313    pub fn event(&mut self, event: Event) {
314        self.events.push(event);
315    }
316
317    /// Adds a diagnostic to the parser error list.
318    pub fn diagnostic(&mut self, diagnostic: Diagnostic) {
319        self.diagnostics.push(diagnostic);
320    }
321
322    /// Starts a new node event.
323    pub fn start(&mut self) -> Marker {
324        // Append any buffered trivia before we start this node
325        if !self.buffered.is_empty() {
326            self.events.append(&mut self.buffered);
327        }
328
329        let pos = self.events.len();
330        self.events.push(Event::NodeStarted {
331            kind: SyntaxKind::Abandoned,
332            forward_parent: None,
333        });
334        Marker::new(pos)
335    }
336
337    /// Gets the current span of the interpolator.
338    pub fn span(&self) -> Span {
339        self.lexer.span()
340    }
341
342    /// Consumes the interpolator and returns a parser.
343    pub fn into_parser<T2>(self) -> Parser<'a, T2>
344    where
345        T2: ParserToken<'a>,
346        T::Extras: Into<T2::Extras>,
347    {
348        Parser {
349            lexer: Some(self.lexer.morph()),
350            events: self.events,
351            recovery: self.recovery,
352            diagnostics: self.diagnostics,
353            buffered: Default::default(),
354        }
355    }
356}
357
358impl<'a, T> Iterator for Interpolator<'a, T>
359where
360    T: Logos<'a, Error = (), Extras = ()> + Copy,
361{
362    type Item = (LexerResult<T>, Span);
363
364    fn next(&mut self) -> Option<Self::Item> {
365        self.lexer.next()
366    }
367}
368
369/// The output of a parse.
370#[allow(missing_debug_implementations)]
371pub struct Output<'a, T>
372where
373    T: ParserToken<'a>,
374{
375    /// The parser's lexer.
376    pub lexer: Lexer<'a, T>,
377    /// The parser events.
378    pub events: Vec<Event>,
379    /// The parser diagnostics.
380    pub diagnostics: Vec<Diagnostic>,
381}
382
383/// Represents the result of a `peek2` operation.
384///
385/// See [Parser::peek2].
386#[derive(Debug, Copy, Clone)]
387pub struct Peek2<T> {
388    /// The first peeked token.
389    pub first: (T, Span),
390    /// The second peeked token.
391    pub second: (T, Span),
392}
393
394/// Implements a WDL parser.
395///
396/// The parser produces a list of events that can be used to
397/// construct a CST.
398#[allow(missing_debug_implementations)]
399pub struct Parser<'a, T>
400where
401    T: ParserToken<'a>,
402{
403    /// The lexer that returns a stream of tokens for the parser.
404    ///
405    /// This may temporarily be `None` during string interpolation.
406    ///
407    /// See the [interpolate][Self::interpolate] method.
408    lexer: Option<Lexer<'a, T>>,
409    /// The events produced by the parser.
410    events: Vec<Event>,
411    /// The recovery token set stack.
412    recovery: Vec<TokenSet>,
413    /// The diagnostics encountered so far.
414    diagnostics: Vec<Diagnostic>,
415    /// The buffered events from a peek operation.
416    buffered: Vec<Event>,
417}
418
419impl<'a, T> Parser<'a, T>
420where
421    T: ParserToken<'a>,
422{
423    /// Construct a new parser from the given lexer.
424    pub fn new(lexer: Lexer<'a, T>) -> Self {
425        Self {
426            lexer: Some(lexer),
427            events: Default::default(),
428            recovery: Default::default(),
429            diagnostics: Default::default(),
430            buffered: Default::default(),
431        }
432    }
433
434    /// Gets the current span of the parser.
435    pub fn span(&self) -> Span {
436        self.lexer.as_ref().expect("expected a lexer").span()
437    }
438
439    /// Gets the source being parsed at the given span.
440    pub fn source(&self, span: Span) -> &'a str {
441        self.lexer.as_ref().expect("expected a lexer").source(span)
442    }
443
444    /// Peeks at the next token (i.e. lookahead 1) from the lexer without
445    /// consuming it.
446    ///
447    /// The token is not added to the event list.
448    ///
449    /// # Note
450    ///
451    /// Note that peeking may cause parser events to be buffered.
452    ///
453    /// If `peek` returns `None`, ensure all buffered events are added to the
454    /// event list by calling `next` on the parser; otherwise, calling `finish`
455    /// may panic.
456    pub fn peek(&mut self) -> Option<(T, Span)> {
457        while let Some((res, span)) = self.lexer.as_mut()?.peek() {
458            if let Some(t) = self.consume_trivia(res, span, true) {
459                return Some(t);
460            }
461        }
462
463        None
464    }
465
466    /// Peeks at the next and next-next tokens (i.e. lookahead 2) from the lexer
467    /// without consuming either token.
468    ///
469    /// The returned tokens are not added to the event list.
470    pub fn peek2(&mut self) -> Option<Peek2<T>> {
471        let first = self.peek()?;
472
473        // We have to clone the lexer here since it only supports a single lookahead.
474        // The clone is cheap, but it does mean we'll re-tokenize this second lookahead
475        // eventually.
476        let mut lexer = self
477            .lexer
478            .as_ref()
479            .expect("there should be a lexer")
480            .clone();
481        lexer
482            .next()
483            .unwrap()
484            .0
485            .expect("should have peeked at a valid token");
486        while let Some((Ok(token), span)) = lexer.next() {
487            if token.is_trivia() {
488                // Ignore trivia
489                continue;
490            }
491
492            return Some(Peek2 {
493                first,
494                second: (token, span),
495            });
496        }
497
498        None
499    }
500
501    /// Consumes the next token only if it matches the given token.
502    ///
503    /// Returns `true` if the token was consumed, `false` if otherwise.
504    pub fn next_if(&mut self, token: T) -> bool {
505        match self.peek() {
506            Some((t, _)) if t == token => {
507                self.next();
508                true
509            }
510            _ => false,
511        }
512    }
513
514    /// Parses a matching token pair that surrounds an item.
515    ///
516    /// This method parses the open token, calls the callback to parse the item,
517    /// and then parses the close token.
518    pub fn matching<F>(
519        &mut self,
520        open: T,
521        close: T,
522        allow_empty: bool,
523        cb: F,
524    ) -> Result<(), Diagnostic>
525    where
526        F: FnOnce(&mut Self, Span) -> Result<(), Diagnostic>,
527    {
528        let open_span = self.expect(open)?;
529
530        // Check to see if the close token is immediately following the opening
531        if allow_empty {
532            match self.peek() {
533                Some((t, _)) if t == close => {
534                    self.next();
535                    return Ok(());
536                }
537                _ => {}
538            }
539        }
540
541        cb(self, open_span)?;
542
543        match self.next() {
544            Some((token, _)) if token == close => Ok(()),
545            found => {
546                let (found, span) = found
547                    .map(|(t, s)| (t.describe(), s))
548                    .unwrap_or_else(|| ("end of input", self.span()));
549
550                Err(unmatched(
551                    open.describe(),
552                    open_span,
553                    close.describe(),
554                    found,
555                    span,
556                ))
557            }
558        }
559    }
560
561    /// Parses a matching token pair that surround a delimited list of items.
562    ///
563    /// This method parses the open token, calls the callback for each delimited
564    /// item, and then parses the close token.
565    ///
566    /// The provided recovery token set is used to recover within the delimited
567    /// item list.
568    pub fn matching_delimited<F>(
569        &mut self,
570        open: T,
571        close: T,
572        delimiter: Option<T>,
573        recovery: TokenSet,
574        cb: F,
575    ) -> Result<(), Diagnostic>
576    where
577        F: FnMut(&mut Self, Marker) -> Result<(), (Marker, Diagnostic)>,
578    {
579        let open_span = self.expect(open)?;
580        self.delimited(close, delimiter, recovery, cb);
581        self.consume_close_token(open, open_span, close);
582        Ok(())
583    }
584
585    /// Consumes a close token if it is the next token to be parsed.
586    ///
587    /// Otherwise, emits an "unmatched" diagnostic and synthesizes the close
588    /// token into the parser's list of events.
589    pub fn consume_close_token(&mut self, open: T, open_span: Span, close: T) {
590        if self.next_if(close) {
591            return;
592        }
593
594        let (found, span) = self
595            .peek()
596            .map(|(t, s)| (t.describe(), s))
597            .unwrap_or_else(|| ("end of input", self.span()));
598
599        self.diagnostic(unmatched(
600            open.describe(),
601            open_span,
602            close.describe(),
603            found,
604            span,
605        ));
606
607        // Synthesize a close token event of zero width
608        self.events.push(Event::Token {
609            kind: close.into_syntax(),
610            span: Span::new(span.start(), 0),
611        });
612    }
613
614    /// Parses a delimited list of items until the given token.
615    ///
616    /// The provided recovery token set is used to recover within the delimited
617    /// item list.
618    ///
619    /// The `until` token is not consumed.
620    pub fn delimited<F>(&mut self, until: T, delimiter: Option<T>, recovery: TokenSet, mut cb: F)
621    where
622        F: FnMut(&mut Self, Marker) -> Result<(), (Marker, Diagnostic)>,
623    {
624        let recovery = if let Some(delimiter) = delimiter {
625            recovery.union(TokenSet::new(&[until.into_raw(), delimiter.into_raw()]))
626        } else {
627            recovery.union(TokenSet::new(&[until.into_raw()]))
628        };
629
630        let parent = self.recovery.last().copied();
631        self.recovery.push(recovery);
632
633        let mut next: Option<(T, Span)> = self.peek();
634        while let Some((token, _)) = next {
635            if token == until {
636                break;
637            }
638
639            let mut lexer = self.lexer.clone();
640            let marker = self.start();
641            if let Err((marker, e)) = cb(self, marker) {
642                if let Some((Ok(token), _)) = lexer.as_mut().expect("should have a lexer").peek()
643                    && !recovery.contains(token.into_raw())
644                {
645                    // Determine if the token is recoverable in the parent recovery set
646                    // If so, we'll restart where we first attempted to parse this item
647                    if let Some(parent) = &parent
648                        && parent.contains(token.into_raw())
649                    {
650                        // Truncate the event list and abandon the marker
651                        self.events.truncate(marker.0);
652                        marker.abandon(self);
653
654                        // Clear any buffered events and reset the lexer
655                        self.buffered.clear();
656                        self.lexer = lexer;
657                        break;
658                    }
659                }
660
661                self.recover(e);
662                marker.abandon(self);
663            }
664
665            next = self.peek();
666
667            if let Some(delimiter) = delimiter
668                && let Some((token, _)) = next
669            {
670                if token == until {
671                    break;
672                }
673
674                if let Err(e) = self.expect(delimiter) {
675                    // Attach a label to the diagnostic hinting at where we expected the
676                    // delimiter to be; to do this, look back at the last non-trivia token event
677                    // in the parser events and use its span for the label.
678                    let e = if let Some(span) = self.events.iter().rev().find_map(|e| match e {
679                        Event::Token { kind, span }
680                            if *kind != SyntaxKind::Whitespace && *kind != SyntaxKind::Comment =>
681                        {
682                            Some(*span)
683                        }
684                        _ => None,
685                    }) {
686                        e.with_label(
687                            format!(
688                                "consider adding a {desc} after this",
689                                desc = delimiter.describe()
690                            ),
691                            Span::new(span.end() - 1, 1),
692                        )
693                    } else {
694                        e
695                    };
696
697                    self.recover(e);
698                    self.next_if(delimiter);
699                }
700
701                next = self.peek();
702            }
703        }
704
705        self.recovery.pop();
706    }
707
708    /// Adds a diagnostic to the parser output.
709    pub fn diagnostic(&mut self, diagnostic: Diagnostic) {
710        self.diagnostics.push(diagnostic);
711    }
712
713    /// Pushes a token set to the parser's recovery token set stack.
714    pub fn push_recovery_set(&mut self, tokens: TokenSet) {
715        self.recovery.push(tokens);
716    }
717
718    /// Pops a token set from the parser's recovery token set stack.
719    ///
720    /// # Panics
721    ///
722    /// Panics if the parser's recovery set is empty.
723    pub fn pop_recovery_set(&mut self) {
724        self.recovery.pop().expect("should pop");
725    }
726
727    /// Recovers from an error by consuming all tokens not in the top-most
728    /// recovery set.
729    ///
730    /// # Panics
731    ///
732    /// Panics if a recovery set was not pushed with [Self::push_recovery_set].
733    pub fn recover(&mut self, mut diagnostic: Diagnostic) {
734        let tokens = *self.recovery.last().expect("expected a top recovery set");
735
736        while let Some((token, span)) = self.peek() {
737            if tokens.contains(token.into_raw()) {
738                break;
739            }
740
741            self.next().unwrap();
742
743            // If the token starts an interpolation, then we need
744            // to move past the entire set of tokens that are part
745            // of the interpolation
746            if T::recover_interpolation(token, span, self) {
747                // If the diagnostic label started at this token, we need to extend its length
748                // to cover the interpolation
749                for label in diagnostic.labels_mut() {
750                    let label_span = label.span();
751                    if label_span.start() != span.start() {
752                        continue;
753                    }
754
755                    // The label should include everything up to the current start
756                    label.set_span(Span::new(
757                        label_span.start(),
758                        self.lexer
759                            .as_ref()
760                            .expect("should have a lexer")
761                            .span()
762                            .end()
763                            - label_span.end()
764                            + 1,
765                    ));
766                }
767            }
768        }
769
770        self.diagnostics.push(diagnostic);
771    }
772
773    /// Performs recovery with the given recovery token set.
774    pub fn recover_with_set(&mut self, diagnostic: Diagnostic, recovery: TokenSet) {
775        self.recovery.push(recovery);
776        self.recover(diagnostic);
777        self.recovery.pop();
778    }
779
780    /// Starts a new node event.
781    pub fn start(&mut self) -> Marker {
782        // Peek before starting the node so that any trivia appears as siblings to this
783        // node
784        if !self.events.is_empty() {
785            self.peek();
786
787            // Append any buffered trivia before we start this node
788            if !self.buffered.is_empty() {
789                self.events.append(&mut self.buffered);
790            }
791        }
792
793        let pos = self.events.len();
794        self.events.push(Event::NodeStarted {
795            kind: SyntaxKind::Abandoned,
796            forward_parent: None,
797        });
798        Marker::new(pos)
799    }
800
801    /// Requires that the current token is the given token.
802    ///
803    /// Panics if the token is not the given token.
804    pub fn require(&mut self, token: T) -> Span {
805        match self.next() {
806            Some((t, span)) if t == token => span,
807            _ => panic!(
808                "lexer not at required token {token}",
809                token = token.describe()
810            ),
811        }
812    }
813
814    /// Requires that the current token is in the given token set.
815    ///
816    /// # Panics
817    ///
818    /// Panics if the token is not in the token set.
819    pub fn require_in(&mut self, tokens: TokenSet) {
820        match self.next() {
821            Some((t, _)) if tokens.contains(t.into_raw()) => {}
822            found => {
823                let found = found.map(|(t, _)| t.describe());
824                panic!(
825                    "unexpected token {found}",
826                    found = found.unwrap_or("end of input")
827                );
828            }
829        }
830    }
831
832    /// Expects the next token to be the given token.
833    ///
834    /// Returns an error if the token is not the given token.
835    pub fn expect(&mut self, token: T) -> Result<Span, Diagnostic> {
836        match self.peek() {
837            Some((t, span)) if t == token => {
838                self.next();
839                Ok(span)
840            }
841            found => {
842                let (found, span) = found
843                    .map(|(t, s)| (Some(t.describe()), s))
844                    .unwrap_or_else(|| (None, self.span()));
845                Err(expected_found(token.describe(), found, span))
846            }
847        }
848    }
849
850    /// Expects the next token to be the given token, but uses
851    /// the provided name in the error.
852    ///
853    /// Returns an error if the token is not the given token.
854    pub fn expect_with_name(&mut self, token: T, name: &'static str) -> Result<Span, Diagnostic> {
855        match self.peek() {
856            Some((t, span)) if t == token => {
857                self.next();
858                Ok(span)
859            }
860            found => {
861                let (found, span) = found
862                    .map(|(t, s)| (Some(t.describe()), s))
863                    .unwrap_or_else(|| (None, self.span()));
864                Err(expected_found(name, found, span))
865            }
866        }
867    }
868
869    /// Expects the next token to be in the given token set.
870    ///
871    /// Returns an error if the token is not the given set.
872    pub fn expect_in(
873        &mut self,
874        tokens: TokenSet,
875        expected: &[&str],
876    ) -> Result<(T, Span), Diagnostic> {
877        match self.peek() {
878            Some((t, span)) if tokens.contains(t.into_raw()) => {
879                self.next();
880                Ok((t, span))
881            }
882            found => {
883                let (found, span) = found
884                    .map(|(t, s)| (Some(t.describe()), s))
885                    .unwrap_or_else(|| (None, self.span()));
886
887                Err(expected_one_of(expected, found, span))
888            }
889        }
890    }
891
892    /// Used to interpolate strings with a different string interpolation token.
893    ///
894    /// The provided callback receives a [Interpolator].
895    ///
896    /// The callback should use [Interpolator::into_parser] for the return
897    /// value.
898    pub fn interpolate<T2, F, R>(&mut self, cb: F) -> R
899    where
900        T2: Logos<'a, Source = str, Error = (), Extras = ()> + Copy,
901        F: FnOnce(Interpolator<'a, T2>) -> (Parser<'a, T>, R),
902    {
903        let input = Interpolator {
904            lexer: std::mem::take(&mut self.lexer)
905                .expect("lexer should exist")
906                .morph(),
907            recovery: std::mem::take(&mut self.recovery),
908            events: std::mem::take(&mut self.events),
909            diagnostics: std::mem::take(&mut self.diagnostics),
910            buffered: std::mem::take(&mut self.buffered),
911        };
912        let (p, result) = cb(input);
913        *self = p;
914        result
915    }
916
917    /// Morph this parser into a parser for a new token type.
918    ///
919    /// The returned parser continues to point at the same span
920    /// as the current parser.
921    pub fn morph<T2>(self) -> Parser<'a, T2>
922    where
923        T2: ParserToken<'a>,
924        T::Extras: Into<T2::Extras>,
925    {
926        Parser {
927            lexer: self.lexer.map(|l| l.morph()),
928            events: self.events,
929            recovery: self.recovery,
930            diagnostics: self.diagnostics,
931            buffered: self.buffered,
932        }
933    }
934
935    /// Consumes the parser and returns an interpolator.
936    pub fn into_interpolator<T2>(self) -> Interpolator<'a, T2>
937    where
938        T2: Logos<'a, Source = str, Error = (), Extras = ()> + Copy,
939    {
940        Interpolator {
941            lexer: self.lexer.expect("lexer should be present").morph(),
942            events: self.events,
943            recovery: self.recovery,
944            diagnostics: self.diagnostics,
945            buffered: self.buffered,
946        }
947    }
948
949    /// Consumes the parser and returns the output.
950    ///
951    /// # Panics
952    ///
953    /// This method panics if buffered events remain in the parser.
954    ///
955    /// To ensure that no buffered events remain, call `next()` on the parser
956    /// and verify it returns `None` before calling this method.
957    pub fn finish(self) -> Output<'a, T> {
958        assert!(
959            self.buffered.is_empty(),
960            "buffered events remain; ensure `next` was called after an unsuccessful peek"
961        );
962
963        Output {
964            lexer: self.lexer.expect("lexer should be present"),
965            events: self.events,
966            diagnostics: self.diagnostics,
967        }
968    }
969
970    /// Updates the syntax kind of the last token event.
971    ///
972    /// # Panics
973    ///
974    /// Panics if the last event was not a token.
975    pub fn update_last_token_kind(&mut self, new_kind: SyntaxKind) {
976        let last = self.events.last_mut().expect("expected a last event");
977        match last {
978            Event::Token { kind, .. } => *kind = new_kind,
979            _ => panic!("the last event is not a token"),
980        }
981    }
982
983    /// Consumes the remainder of the unparsed source into a special
984    /// "unparsed" token.
985    ///
986    /// This occurs when a source file is missing a version statement or
987    /// if the version specified is unsupported.
988    pub fn consume_remainder(&mut self) {
989        if !self.buffered.is_empty() {
990            self.events.append(&mut self.buffered);
991        }
992
993        if let Some(span) = self
994            .lexer
995            .as_mut()
996            .expect("there should be a lexer")
997            .consume_remainder()
998        {
999            self.events.push(Event::Token {
1000                kind: SyntaxKind::Unparsed,
1001                span,
1002            });
1003        }
1004    }
1005
1006    /// Consumes any trivia tokens by adding them to the event list.
1007    fn consume_trivia(
1008        &mut self,
1009        res: LexerResult<T>,
1010        span: Span,
1011        peeked: bool,
1012    ) -> Option<(T, Span)> {
1013        // If not peeked and there are buffered events, append them now
1014        if !peeked && !self.buffered.is_empty() {
1015            self.events.append(&mut self.buffered);
1016        }
1017
1018        let event = match res {
1019            Ok(token) => {
1020                if !token.is_trivia() {
1021                    return Some((token, span));
1022                }
1023
1024                Event::Token {
1025                    kind: token.into_syntax(),
1026                    span,
1027                }
1028            }
1029            Err(_) => {
1030                self.diagnostic(
1031                    Diagnostic::error("an unknown token was encountered")
1032                        .with_label(Self::unsupported_token_text(self.source(span)), span),
1033                );
1034                Event::Token {
1035                    kind: SyntaxKind::Unknown,
1036                    span,
1037                }
1038            }
1039        };
1040
1041        if peeked {
1042            self.lexer.as_mut().expect("should have a lexer").next();
1043            self.buffered.push(event);
1044        } else {
1045            self.events.push(event);
1046        }
1047        None
1048    }
1049
1050    /// A helper for unsupported token error span text.
1051    fn unsupported_token_text(token: &str) -> &'static str {
1052        match token {
1053            "&" => "did you mean to use `&&` here?",
1054            "|" => "did you mean to use `||` here?",
1055            _ => "this is not a supported WDL token",
1056        }
1057    }
1058}
1059
1060impl<'a, T> Iterator for Parser<'a, T>
1061where
1062    T: ParserToken<'a>,
1063{
1064    type Item = (T, Span);
1065
1066    fn next(&mut self) -> Option<(T, Span)> {
1067        while let Some((res, span)) = self.lexer.as_mut()?.next() {
1068            if let Some((token, span)) = self.consume_trivia(res, span, false) {
1069                self.events.push(Event::Token {
1070                    kind: token.into_syntax(),
1071                    span,
1072                });
1073                return Some((token, span));
1074            }
1075        }
1076
1077        if !self.buffered.is_empty() {
1078            self.events.append(&mut self.buffered);
1079        }
1080
1081        None
1082    }
1083}
wdl_grammar/parser.rs

wdl_grammar/
parser.rs