wdl_grammar/
tree.rs

1//! Module for the concrete syntax tree (CST) representation.
2
3pub mod dive;
4
5use std::borrow::Cow;
6use std::collections::VecDeque;
7use std::fmt;
8use std::iter;
9
10use itertools::Either;
11use rowan::Direction;
12use rowan::GreenNodeBuilder;
13use rowan::GreenNodeData;
14use strum::VariantArray;
15
16use super::Diagnostic;
17use super::grammar;
18use super::lexer::Lexer;
19use super::parser::Event;
20use crate::parser::Parser;
21
22/// Represents the kind of syntax element (node or token) in a WDL concrete
23/// syntax tree (CST).
24///
25/// Nodes have at least one token child and represent a syntactic construct.
26///
27/// Tokens are terminal and represent any span of the source.
28///
29/// This enumeration is a union of all supported WDL tokens and nodes.
30#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, VariantArray)]
31#[repr(u16)]
32pub enum SyntaxKind {
33    /// The token is unknown to WDL.
34    Unknown,
35    /// The token represents unparsed source.
36    ///
37    /// Unparsed source occurs in WDL source files with unsupported versions.
38    Unparsed,
39    /// A whitespace token.
40    Whitespace,
41    /// A comment token.
42    Comment,
43    /// A WDL version token.
44    Version,
45    /// A literal float token.
46    Float,
47    /// A literal integer token.
48    Integer,
49    /// An identifier token.
50    Ident,
51    /// A single quote token.
52    SingleQuote,
53    /// A double quote token.
54    DoubleQuote,
55    /// An open heredoc token.
56    OpenHeredoc,
57    /// A close heredoc token.
58    CloseHeredoc,
59    /// The `Array` type keyword token.
60    ArrayTypeKeyword,
61    /// The `Boolean` type keyword token.
62    BooleanTypeKeyword,
63    /// The `File` type keyword token.
64    FileTypeKeyword,
65    /// The `Float` type keyword token.
66    FloatTypeKeyword,
67    /// The `Int` type keyword token.
68    IntTypeKeyword,
69    /// The `Map` type keyword token.
70    MapTypeKeyword,
71    /// The `Object` type keyword token.
72    ObjectTypeKeyword,
73    /// The `Pair` type keyword token.
74    PairTypeKeyword,
75    /// The `String` type keyword token.
76    StringTypeKeyword,
77    /// The `after` keyword token.
78    AfterKeyword,
79    /// The `alias` keyword token.
80    AliasKeyword,
81    /// The `as` keyword token.
82    AsKeyword,
83    /// The `call` keyword token.
84    CallKeyword,
85    /// The `command` keyword token.
86    CommandKeyword,
87    /// The `else` keyword token.
88    ElseKeyword,
89    /// The `env` keyword token.
90    EnvKeyword,
91    /// The `false` keyword token.
92    FalseKeyword,
93    /// The `if` keyword token.
94    IfKeyword,
95    /// The `in` keyword token.
96    InKeyword,
97    /// The `import` keyword token.
98    ImportKeyword,
99    /// The `input` keyword token.
100    InputKeyword,
101    /// The `meta` keyword token.
102    MetaKeyword,
103    /// The `None` keyword.
104    NoneKeyword,
105    /// The `null` keyword token.
106    NullKeyword,
107    /// The `object` keyword token.
108    ObjectKeyword,
109    /// The `output` keyword token.
110    OutputKeyword,
111    /// The `parameter_meta` keyword token.
112    ParameterMetaKeyword,
113    /// The `runtime` keyword token.
114    RuntimeKeyword,
115    /// The `scatter` keyword token.
116    ScatterKeyword,
117    /// The `struct` keyword token.
118    StructKeyword,
119    /// The `task` keyword token.
120    TaskKeyword,
121    /// The `then` keyword token.
122    ThenKeyword,
123    /// The `true` keyword token.
124    TrueKeyword,
125    /// The `version` keyword token.
126    VersionKeyword,
127    /// The `workflow` keyword token.
128    WorkflowKeyword,
129    /// The 1.2 `Directory` type keyword token.
130    DirectoryTypeKeyword,
131    /// The 1.2 `hints` keyword token.
132    HintsKeyword,
133    /// The 1.2 `requirements` keyword token.
134    RequirementsKeyword,
135    /// The `{` symbol token.
136    OpenBrace,
137    /// The `}` symbol token.
138    CloseBrace,
139    /// The `[` symbol token.
140    OpenBracket,
141    /// The `]` symbol token.
142    CloseBracket,
143    /// The `=` symbol token.
144    Assignment,
145    /// The `:` symbol token.
146    Colon,
147    /// The `,` symbol token.
148    Comma,
149    /// The `(` symbol token.
150    OpenParen,
151    /// The `)` symbol token.
152    CloseParen,
153    /// The `?` symbol token.
154    QuestionMark,
155    /// The `!` symbol token.
156    Exclamation,
157    /// The `+` symbol token.
158    Plus,
159    /// The `-` symbol token.
160    Minus,
161    /// The `||` symbol token.
162    LogicalOr,
163    /// The `&&` symbol token.
164    LogicalAnd,
165    /// The `*` symbol token.
166    Asterisk,
167    /// The `**` symbol token.
168    Exponentiation,
169    /// The `/` symbol token.
170    Slash,
171    /// The `%` symbol token.
172    Percent,
173    /// The `==` symbol token.
174    Equal,
175    /// The `!=` symbol token.
176    NotEqual,
177    /// The `<=` symbol token.
178    LessEqual,
179    /// The `>=` symbol token.
180    GreaterEqual,
181    /// The `<` symbol token.
182    Less,
183    /// The `>` symbol token.
184    Greater,
185    /// The `.` symbol token.
186    Dot,
187    /// A literal text part of a string.
188    LiteralStringText,
189    /// A literal text part of a command.
190    LiteralCommandText,
191    /// A placeholder open token.
192    PlaceholderOpen,
193
194    /// Abandoned nodes are nodes that encountered errors.
195    ///
196    /// Children of abandoned nodes are re-parented to the parent of
197    /// the abandoned node.
198    ///
199    /// As this is an internal implementation of error recovery,
200    /// hide this variant from the documentation.
201    #[doc(hidden)]
202    Abandoned,
203    /// Represents the WDL document root node.
204    RootNode,
205    /// Represents a version statement node.
206    VersionStatementNode,
207    /// Represents an import statement node.
208    ImportStatementNode,
209    /// Represents an import alias node.
210    ImportAliasNode,
211    /// Represents a struct definition node.
212    StructDefinitionNode,
213    /// Represents a task definition node.
214    TaskDefinitionNode,
215    /// Represents a workflow definition node.
216    WorkflowDefinitionNode,
217    /// Represents an unbound declaration node.
218    UnboundDeclNode,
219    /// Represents a bound declaration node.
220    BoundDeclNode,
221    /// Represents an input section node.
222    InputSectionNode,
223    /// Represents an output section node.
224    OutputSectionNode,
225    /// Represents a command section node.
226    CommandSectionNode,
227    /// Represents a requirements section node.
228    RequirementsSectionNode,
229    /// Represents a requirements item node.
230    RequirementsItemNode,
231    /// Represents a hints section node in a task.
232    TaskHintsSectionNode,
233    /// Represents a hints section node in a workflow.
234    WorkflowHintsSectionNode,
235    /// Represents a hints item node in a task.
236    TaskHintsItemNode,
237    /// Represents a hints item node in a workflow.
238    WorkflowHintsItemNode,
239    /// Represents a literal object in a workflow hints item value.
240    WorkflowHintsObjectNode,
241    /// Represents an item in a workflow hints object.
242    WorkflowHintsObjectItemNode,
243    /// Represents a literal array in a workflow hints item value.
244    WorkflowHintsArrayNode,
245    /// Represents a runtime section node.
246    RuntimeSectionNode,
247    /// Represents a runtime item node.
248    RuntimeItemNode,
249    /// Represents a primitive type node.
250    PrimitiveTypeNode,
251    /// Represents a map type node.
252    MapTypeNode,
253    /// Represents an array type node.
254    ArrayTypeNode,
255    /// Represents a pair type node.
256    PairTypeNode,
257    /// Represents an object type node.
258    ObjectTypeNode,
259    /// Represents a type reference node.
260    TypeRefNode,
261    /// Represents a metadata section node.
262    MetadataSectionNode,
263    /// Represents a parameter metadata section node.
264    ParameterMetadataSectionNode,
265    /// Represents a metadata object item node.
266    MetadataObjectItemNode,
267    /// Represents a metadata object node.
268    MetadataObjectNode,
269    /// Represents a metadata array node.
270    MetadataArrayNode,
271    /// Represents a literal integer node.
272    LiteralIntegerNode,
273    /// Represents a literal float node.
274    LiteralFloatNode,
275    /// Represents a literal boolean node.
276    LiteralBooleanNode,
277    /// Represents a literal `None` node.
278    LiteralNoneNode,
279    /// Represents a literal null node.
280    LiteralNullNode,
281    /// Represents a literal string node.
282    LiteralStringNode,
283    /// Represents a literal pair node.
284    LiteralPairNode,
285    /// Represents a literal array node.
286    LiteralArrayNode,
287    /// Represents a literal map node.
288    LiteralMapNode,
289    /// Represents a literal map item node.
290    LiteralMapItemNode,
291    /// Represents a literal object node.
292    LiteralObjectNode,
293    /// Represents a literal object item node.
294    LiteralObjectItemNode,
295    /// Represents a literal struct node.
296    LiteralStructNode,
297    /// Represents a literal struct item node.
298    LiteralStructItemNode,
299    /// Represents a literal hints node.
300    LiteralHintsNode,
301    /// Represents a literal hints item node.
302    LiteralHintsItemNode,
303    /// Represents a literal input node.
304    LiteralInputNode,
305    /// Represents a literal input item node.
306    LiteralInputItemNode,
307    /// Represents a literal output node.
308    LiteralOutputNode,
309    /// Represents a literal output item node.
310    LiteralOutputItemNode,
311    /// Represents a parenthesized expression node.
312    ParenthesizedExprNode,
313    /// Represents a name reference expression node.
314    NameRefExprNode,
315    /// Represents an `if` expression node.
316    IfExprNode,
317    /// Represents a logical not expression node.
318    LogicalNotExprNode,
319    /// Represents a negation expression node.
320    NegationExprNode,
321    /// Represents a logical `OR` expression node.
322    LogicalOrExprNode,
323    /// Represents a logical `AND` expression node.
324    LogicalAndExprNode,
325    /// Represents an equality expression node.
326    EqualityExprNode,
327    /// Represents an inequality expression node.
328    InequalityExprNode,
329    /// Represents a "less than" expression node.
330    LessExprNode,
331    /// Represents a "less than or equal to" expression node.
332    LessEqualExprNode,
333    /// Represents a "greater than" expression node.
334    GreaterExprNode,
335    /// Represents a "greater than or equal to" expression node.
336    GreaterEqualExprNode,
337    /// Represents an addition expression node.
338    AdditionExprNode,
339    /// Represents a subtraction expression node.
340    SubtractionExprNode,
341    /// Represents a multiplication expression node.
342    MultiplicationExprNode,
343    /// Represents a division expression node.
344    DivisionExprNode,
345    /// Represents a modulo expression node.
346    ModuloExprNode,
347    /// Represents a exponentiation expr node.
348    ExponentiationExprNode,
349    /// Represents a call expression node.'
350    CallExprNode,
351    /// Represents an index expression node.
352    IndexExprNode,
353    /// Represents an an access expression node.
354    AccessExprNode,
355    /// Represents a placeholder node in a string literal.
356    PlaceholderNode,
357    /// Placeholder `sep` option node.
358    PlaceholderSepOptionNode,
359    /// Placeholder `default` option node.
360    PlaceholderDefaultOptionNode,
361    /// Placeholder `true`/`false` option node.
362    PlaceholderTrueFalseOptionNode,
363    /// Represents a conditional statement node.
364    ConditionalStatementNode,
365    /// Represents a scatter statement node.
366    ScatterStatementNode,
367    /// Represents a call statement node.
368    CallStatementNode,
369    /// Represents a call target node in a call statement.
370    CallTargetNode,
371    /// Represents a call alias node in a call statement.
372    CallAliasNode,
373    /// Represents an `after` clause node in a call statement.
374    CallAfterNode,
375    /// Represents a call input item node.
376    CallInputItemNode,
377
378    // WARNING: this must always be the last variant.
379    /// The exclusive maximum syntax kind value.
380    MAX,
381}
382
383impl SyntaxKind {
384    /// Returns whether the token is a symbolic [`SyntaxKind`].
385    ///
386    /// Generally speaking, symbolic [`SyntaxKind`]s have special meanings
387    /// during parsing—they are not real elements of the grammar but rather an
388    /// implementation detail.
389    pub fn is_symbolic(&self) -> bool {
390        matches!(
391            self,
392            Self::Abandoned | Self::Unknown | Self::Unparsed | Self::MAX
393        )
394    }
395
396    /// Describes the syntax kind.
397    pub fn describe(&self) -> &'static str {
398        match self {
399            Self::Unknown => unreachable!(),
400            Self::Unparsed => unreachable!(),
401            Self::Whitespace => "whitespace",
402            Self::Comment => "comment",
403            Self::Version => "version",
404            Self::Float => "float",
405            Self::Integer => "integer",
406            Self::Ident => "identifier",
407            Self::SingleQuote => "single quote",
408            Self::DoubleQuote => "double quote",
409            Self::OpenHeredoc => "open heredoc",
410            Self::CloseHeredoc => "close heredoc",
411            Self::ArrayTypeKeyword => "`Array` type keyword",
412            Self::BooleanTypeKeyword => "`Boolean` type keyword",
413            Self::FileTypeKeyword => "`File` type keyword",
414            Self::FloatTypeKeyword => "`Float` type keyword",
415            Self::IntTypeKeyword => "`Int` type keyword",
416            Self::MapTypeKeyword => "`Map` type keyword",
417            Self::ObjectTypeKeyword => "`Object` type keyword",
418            Self::PairTypeKeyword => "`Pair` type keyword",
419            Self::StringTypeKeyword => "`String` type keyword",
420            Self::AfterKeyword => "`after` keyword",
421            Self::AliasKeyword => "`alias` keyword",
422            Self::AsKeyword => "`as` keyword",
423            Self::CallKeyword => "`call` keyword",
424            Self::CommandKeyword => "`command` keyword",
425            Self::ElseKeyword => "`else` keyword",
426            Self::EnvKeyword => "`env` keyword",
427            Self::FalseKeyword => "`false` keyword",
428            Self::IfKeyword => "`if` keyword",
429            Self::InKeyword => "`in` keyword",
430            Self::ImportKeyword => "`import` keyword",
431            Self::InputKeyword => "`input` keyword",
432            Self::MetaKeyword => "`meta` keyword",
433            Self::NoneKeyword => "`None` keyword",
434            Self::NullKeyword => "`null` keyword",
435            Self::ObjectKeyword => "`object` keyword",
436            Self::OutputKeyword => "`output` keyword",
437            Self::ParameterMetaKeyword => "`parameter_meta` keyword",
438            Self::RuntimeKeyword => "`runtime` keyword",
439            Self::ScatterKeyword => "`scatter` keyword",
440            Self::StructKeyword => "`struct` keyword",
441            Self::TaskKeyword => "`task` keyword",
442            Self::ThenKeyword => "`then` keyword",
443            Self::TrueKeyword => "`true` keyword",
444            Self::VersionKeyword => "`version` keyword",
445            Self::WorkflowKeyword => "`workflow` keyword",
446            Self::DirectoryTypeKeyword => "`Directory` type keyword",
447            Self::HintsKeyword => "`hints` keyword",
448            Self::RequirementsKeyword => "`requirements` keyword",
449            Self::OpenBrace => "`{` symbol",
450            Self::CloseBrace => "`}` symbol",
451            Self::OpenBracket => "`[` symbol",
452            Self::CloseBracket => "`]` symbol",
453            Self::Assignment => "`=` symbol",
454            Self::Colon => "`:` symbol",
455            Self::Comma => "`,` symbol",
456            Self::OpenParen => "`(` symbol",
457            Self::CloseParen => "`)` symbol",
458            Self::QuestionMark => "`?` symbol",
459            Self::Exclamation => "`!` symbol",
460            Self::Plus => "`+` symbol",
461            Self::Minus => "`-` symbol",
462            Self::LogicalOr => "`||` symbol",
463            Self::LogicalAnd => "`&&` symbol",
464            Self::Asterisk => "`*` symbol",
465            Self::Exponentiation => "`**` symbol",
466            Self::Slash => "`/` symbol",
467            Self::Percent => "`%` symbol",
468            Self::Equal => "`==` symbol",
469            Self::NotEqual => "`!=` symbol",
470            Self::LessEqual => "`<=` symbol",
471            Self::GreaterEqual => "`>=` symbol",
472            Self::Less => "`<` symbol",
473            Self::Greater => "`>` symbol",
474            Self::Dot => "`.` symbol",
475            Self::LiteralStringText => "literal string text",
476            Self::LiteralCommandText => "literal command text",
477            Self::PlaceholderOpen => "placeholder open",
478            Self::Abandoned => unreachable!(),
479            Self::RootNode => "root node",
480            Self::VersionStatementNode => "version statement",
481            Self::ImportStatementNode => "import statement",
482            Self::ImportAliasNode => "import alias",
483            Self::StructDefinitionNode => "struct definition",
484            Self::TaskDefinitionNode => "task definition",
485            Self::WorkflowDefinitionNode => "workflow definition",
486            Self::UnboundDeclNode => "declaration without assignment",
487            Self::BoundDeclNode => "declaration with assignment",
488            Self::InputSectionNode => "input section",
489            Self::OutputSectionNode => "output section",
490            Self::CommandSectionNode => "command section",
491            Self::RequirementsSectionNode => "requirements section",
492            Self::RequirementsItemNode => "requirements item",
493            Self::TaskHintsSectionNode | Self::WorkflowHintsSectionNode => "hints section",
494            Self::TaskHintsItemNode | Self::WorkflowHintsItemNode => "hints item",
495            Self::WorkflowHintsObjectNode => "literal object",
496            Self::WorkflowHintsObjectItemNode => "literal object item",
497            Self::WorkflowHintsArrayNode => "literal array",
498            Self::RuntimeSectionNode => "runtime section",
499            Self::RuntimeItemNode => "runtime item",
500            Self::PrimitiveTypeNode => "primitive type",
501            Self::MapTypeNode => "map type",
502            Self::ArrayTypeNode => "array type",
503            Self::PairTypeNode => "pair type",
504            Self::ObjectTypeNode => "object type",
505            Self::TypeRefNode => "type reference",
506            Self::MetadataSectionNode => "metadata section",
507            Self::ParameterMetadataSectionNode => "parameter metadata section",
508            Self::MetadataObjectItemNode => "metadata object item",
509            Self::MetadataObjectNode => "metadata object",
510            Self::MetadataArrayNode => "metadata array",
511            Self::LiteralIntegerNode => "literal integer",
512            Self::LiteralFloatNode => "literal float",
513            Self::LiteralBooleanNode => "literal boolean",
514            Self::LiteralNoneNode => "literal `None`",
515            Self::LiteralNullNode => "literal null",
516            Self::LiteralStringNode => "literal string",
517            Self::LiteralPairNode => "literal pair",
518            Self::LiteralArrayNode => "literal array",
519            Self::LiteralMapNode => "literal map",
520            Self::LiteralMapItemNode => "literal map item",
521            Self::LiteralObjectNode => "literal object",
522            Self::LiteralObjectItemNode => "literal object item",
523            Self::LiteralStructNode => "literal struct",
524            Self::LiteralStructItemNode => "literal struct item",
525            Self::LiteralHintsNode => "literal hints",
526            Self::LiteralHintsItemNode => "literal hints item",
527            Self::LiteralInputNode => "literal input",
528            Self::LiteralInputItemNode => "literal input item",
529            Self::LiteralOutputNode => "literal output",
530            Self::LiteralOutputItemNode => "literal output item",
531            Self::ParenthesizedExprNode => "parenthesized expression",
532            Self::NameRefExprNode => "name reference expression",
533            Self::IfExprNode => "`if` expression",
534            Self::LogicalNotExprNode => "logical not expression",
535            Self::NegationExprNode => "negation expression",
536            Self::LogicalOrExprNode => "logical OR expression",
537            Self::LogicalAndExprNode => "logical AND expression",
538            Self::EqualityExprNode => "equality expression",
539            Self::InequalityExprNode => "inequality expression",
540            Self::LessExprNode => "less than expression",
541            Self::LessEqualExprNode => "less than or equal to expression",
542            Self::GreaterExprNode => "greater than expression",
543            Self::GreaterEqualExprNode => "greater than or equal to expression",
544            Self::AdditionExprNode => "addition expression",
545            Self::SubtractionExprNode => "subtraction expression",
546            Self::MultiplicationExprNode => "multiplication expression",
547            Self::DivisionExprNode => "division expression",
548            Self::ModuloExprNode => "modulo expression",
549            Self::ExponentiationExprNode => "exponentiation expression",
550            Self::CallExprNode => "call expression",
551            Self::IndexExprNode => "index expression",
552            Self::AccessExprNode => "access expression",
553            Self::PlaceholderNode => "placeholder",
554            Self::PlaceholderSepOptionNode => "placeholder `sep` option",
555            Self::PlaceholderDefaultOptionNode => "placeholder `default` option",
556            Self::PlaceholderTrueFalseOptionNode => "placeholder `true`/`false` option",
557            Self::ConditionalStatementNode => "conditional statement",
558            Self::ScatterStatementNode => "scatter statement",
559            Self::CallStatementNode => "call statement",
560            Self::CallTargetNode => "call target",
561            Self::CallAliasNode => "call alias",
562            Self::CallAfterNode => "call `after` clause",
563            Self::CallInputItemNode => "call input item",
564            Self::MAX => unreachable!(),
565        }
566    }
567
568    /// Returns whether the [`SyntaxKind`] is trivia.
569    pub fn is_trivia(&self) -> bool {
570        matches!(self, Self::Whitespace | Self::Comment)
571    }
572}
573
574/// Every [`SyntaxKind`] variant.
575pub static ALL_SYNTAX_KIND: &[SyntaxKind] = SyntaxKind::VARIANTS;
576
577impl From<SyntaxKind> for rowan::SyntaxKind {
578    fn from(kind: SyntaxKind) -> Self {
579        rowan::SyntaxKind(kind as u16)
580    }
581}
582
583/// Represents the Workflow Definition Language (WDL).
584#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
585pub struct WorkflowDescriptionLanguage;
586
587impl rowan::Language for WorkflowDescriptionLanguage {
588    type Kind = SyntaxKind;
589
590    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
591        assert!(raw.0 <= SyntaxKind::MAX as u16);
592        unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
593    }
594
595    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
596        kind.into()
597    }
598}
599
600/// Represents a node in the concrete syntax tree.
601pub type SyntaxNode = rowan::SyntaxNode<WorkflowDescriptionLanguage>;
602/// Represents a token in the concrete syntax tree.
603pub type SyntaxToken = rowan::SyntaxToken<WorkflowDescriptionLanguage>;
604/// Represents an element (node or token) in the concrete syntax tree.
605pub type SyntaxElement = rowan::SyntaxElement<WorkflowDescriptionLanguage>;
606/// Represents node children in the concrete syntax tree.
607pub type SyntaxNodeChildren = rowan::SyntaxNodeChildren<WorkflowDescriptionLanguage>;
608
609/// Constructs a concrete syntax tree from a list of parser events.
610pub fn construct_tree(source: &str, mut events: Vec<Event>) -> SyntaxNode {
611    let mut builder = GreenNodeBuilder::default();
612    let mut ancestors = Vec::new();
613
614    for i in 0..events.len() {
615        match std::mem::replace(&mut events[i], Event::abandoned()) {
616            Event::NodeStarted {
617                kind,
618                forward_parent,
619            } => {
620                // Walk the forward parent chain, if there is one, and push
621                // each forward parent to the ancestors list
622                ancestors.push(kind);
623                let mut idx = i;
624                let mut fp: Option<usize> = forward_parent;
625                while let Some(distance) = fp {
626                    idx += distance;
627                    fp = match std::mem::replace(&mut events[idx], Event::abandoned()) {
628                        Event::NodeStarted {
629                            kind,
630                            forward_parent,
631                        } => {
632                            ancestors.push(kind);
633                            forward_parent
634                        }
635                        _ => unreachable!(),
636                    };
637                }
638
639                // As the current node was pushed first and then its ancestors, walk
640                // the list in reverse to start the "oldest" ancestor first
641                for kind in ancestors.drain(..).rev() {
642                    if kind != SyntaxKind::Abandoned {
643                        builder.start_node(kind.into());
644                    }
645                }
646            }
647            Event::NodeFinished => builder.finish_node(),
648            Event::Token { kind, span } => {
649                builder.token(kind.into(), &source[span.start()..span.end()])
650            }
651        }
652    }
653
654    SyntaxNode::new_root(builder.finish())
655}
656
657/// Represents an untyped concrete syntax tree.
658#[derive(Clone, PartialEq, Eq, Hash)]
659pub struct SyntaxTree(SyntaxNode);
660
661impl SyntaxTree {
662    /// Parses WDL source to produce a syntax tree.
663    ///
664    /// A syntax tree is always returned, even for invalid WDL documents.
665    ///
666    /// Additionally, the list of diagnostics encountered during the parse is
667    /// returned; if the list is empty, the tree is syntactically correct.
668    ///
669    /// However, additional validation is required to ensure the source is
670    /// a valid WDL document.
671    ///
672    /// # Example
673    ///
674    /// ```rust
675    /// # use wdl_grammar::SyntaxTree;
676    /// let (tree, diagnostics) = SyntaxTree::parse("version 1.1");
677    /// assert!(diagnostics.is_empty());
678    /// println!("{tree:#?}");
679    /// ```
680    pub fn parse(source: &str) -> (Self, Vec<Diagnostic>) {
681        let parser = Parser::new(Lexer::new(source));
682        let (events, mut diagnostics) = grammar::document(source, parser);
683        diagnostics.sort();
684        (Self(construct_tree(source, events)), diagnostics)
685    }
686
687    /// Gets the root syntax node of the tree.
688    pub fn root(&self) -> &SyntaxNode {
689        &self.0
690    }
691
692    /// Gets a copy of the underlying root green node for the tree.
693    pub fn green(&self) -> Cow<'_, GreenNodeData> {
694        self.0.green()
695    }
696
697    /// Converts the tree into a syntax node.
698    pub fn into_syntax(self) -> SyntaxNode {
699        self.0
700    }
701}
702
703impl fmt::Display for SyntaxTree {
704    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
705        self.0.fmt(f)
706    }
707}
708
709impl fmt::Debug for SyntaxTree {
710    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
711        self.0.fmt(f)
712    }
713}
714
715/// An extension trait for [`SyntaxNode`]s, [`SyntaxToken`]s, and
716/// [`SyntaxElement`]s.
717pub trait SyntaxExt {
718    /// Returns whether `self` matches the provided element.
719    fn matches(&self, other: &SyntaxElement) -> bool;
720
721    /// Gets the parent of the element.
722    ///
723    /// Returns `None` for the root node.
724    fn parent(&self) -> Option<SyntaxNode>;
725
726    /// Gets the child index of the element.
727    fn index(&self) -> usize;
728
729    /// Gets the siblings with tokens.
730    ///
731    /// **NOTE:** this is needed because Rowan does not encapsulate this
732    /// functionality in a trait. Once wrapped here, most of the functions
733    /// provided by this extension trait can just be provided, which simplifies
734    /// the code. Generally speaking, this should just defer to the underlying
735    /// `siblings_with_tokens` method for each type.
736    fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator<Item = SyntaxElement>;
737
738    /// Returns all of the siblings _before_ the current element.
739    ///
740    /// The siblings are returned in the order they were parsed.
741    fn preceding_siblings(&self) -> impl Iterator<Item = SyntaxElement> {
742        let index = self.index();
743        self.parent()
744            .into_iter()
745            .flat_map(move |p| p.children_with_tokens().take(index))
746    }
747
748    /// Returns all of the siblings _after_ the current element.
749    ///
750    /// The siblings are returned in the order they were parsed.
751    fn succeeding_siblings(&self) -> impl Iterator<Item = SyntaxElement> {
752        self.siblings_with_tokens(Direction::Next)
753            // NOTE: this `skip` is necessary because `siblings_with_tokens` returns the current
754            // node.
755            .skip(1)
756    }
757
758    /// Gets all elements that are adjacent to a particular element (not
759    /// including the element itself). This means in both the forward and
760    /// reverse direction.
761    ///
762    /// The siblings are returned in the order they were parsed.
763    fn adjacent(&self) -> impl Iterator<Item = SyntaxElement> {
764        self.preceding_siblings().chain(self.succeeding_siblings())
765    }
766}
767
768impl SyntaxExt for SyntaxNode {
769    fn matches(&self, other: &SyntaxElement) -> bool {
770        other.as_node().map(|n| n == self).unwrap_or(false)
771    }
772
773    fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator<Item = SyntaxElement> {
774        self.siblings_with_tokens(direction)
775    }
776
777    fn parent(&self) -> Option<SyntaxNode> {
778        self.parent()
779    }
780
781    fn index(&self) -> usize {
782        self.index()
783    }
784}
785
786impl SyntaxExt for SyntaxToken {
787    fn matches(&self, other: &SyntaxElement) -> bool {
788        other.as_token().map(|n| n == self).unwrap_or(false)
789    }
790
791    fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator<Item = SyntaxElement> {
792        self.siblings_with_tokens(direction)
793    }
794
795    fn parent(&self) -> Option<SyntaxNode> {
796        self.parent()
797    }
798
799    fn index(&self) -> usize {
800        self.index()
801    }
802}
803
804impl SyntaxExt for SyntaxElement {
805    fn matches(&self, other: &SyntaxElement) -> bool {
806        self == other
807    }
808
809    fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator<Item = SyntaxElement> {
810        match self {
811            SyntaxElement::Node(node) => Either::Left(node.siblings_with_tokens(direction)),
812            SyntaxElement::Token(token) => Either::Right(token.siblings_with_tokens(direction)),
813        }
814    }
815
816    fn parent(&self) -> Option<SyntaxNode> {
817        self.parent()
818    }
819
820    fn index(&self) -> usize {
821        self.index()
822    }
823}
824
825/// An extension trait for [`SyntaxToken`]s.
826pub trait SyntaxTokenExt {
827    /// Gets all of the substantial preceding trivia for an element.
828    fn preceding_trivia(&self) -> impl Iterator<Item = SyntaxToken>;
829
830    /// Gets all of the substantial succeeding trivia for an element.
831    fn succeeding_trivia(&self) -> impl Iterator<Item = SyntaxToken>;
832
833    /// Get any inline comment directly following an element on the
834    /// same line.
835    fn inline_comment(&self) -> Option<SyntaxToken>;
836}
837
838impl SyntaxTokenExt for SyntaxToken {
839    fn preceding_trivia(&self) -> impl Iterator<Item = SyntaxToken> {
840        let mut tokens = VecDeque::new();
841        let mut cur = self.prev_token();
842        while let Some(token) = cur {
843            cur = token.prev_token();
844            // Stop at first non-trivia
845            if !token.kind().is_trivia() {
846                break;
847            }
848            // Stop if a comment is not on its own line
849            if token.kind() == SyntaxKind::Comment {
850                if let Some(prev) = token.prev_token() {
851                    if prev.kind() == SyntaxKind::Whitespace {
852                        let has_newlines = prev.text().chars().any(|c| c == '\n');
853                        // If there are newlines in 'prev' then we know
854                        // that the comment is on its own line.
855                        // The comment may still be on its own line if
856                        // 'prev' does not have newlines and nothing comes
857                        // before 'prev'.
858                        if !has_newlines && prev.prev_token().is_some() {
859                            break;
860                        }
861                    } else {
862                        // There is something else on this line before the comment.
863                        break;
864                    }
865                }
866            }
867            // Filter out whitespace that is not substantial
868            match token.kind() {
869                SyntaxKind::Whitespace
870                    if token.text().chars().filter(|c| *c == '\n').count() > 1 =>
871                {
872                    tokens.push_front(token);
873                }
874                SyntaxKind::Comment => {
875                    tokens.push_front(token);
876                }
877                _ => {}
878            }
879        }
880        tokens.into_iter()
881    }
882
883    fn succeeding_trivia(&self) -> impl Iterator<Item = SyntaxToken> {
884        let mut next = self.next_token();
885        iter::from_fn(move || {
886            let cur = next.clone()?;
887            next = cur.next_token();
888            Some(cur)
889        })
890        .take_while(|t| {
891            // Stop at first non-trivia
892            t.kind().is_trivia()
893        })
894        .filter(|t| {
895            // Filter out whitespace that is not substantial
896            if t.kind() == SyntaxKind::Whitespace {
897                return t.text().chars().filter(|c| *c == '\n').count() > 1;
898            }
899            true
900        })
901    }
902
903    fn inline_comment(&self) -> Option<SyntaxToken> {
904        let mut next = self.next_token();
905        iter::from_fn(move || {
906            let cur = next.clone()?;
907            next = cur.next_token();
908            Some(cur)
909        })
910        .take_while(|t| {
911            // Stop at non-trivia
912            if !t.kind().is_trivia() {
913                return false;
914            }
915            // Stop on first whitespace containing a newline
916            if t.kind() == SyntaxKind::Whitespace {
917                return !t.text().chars().any(|c| c == '\n');
918            }
919            true
920        })
921        .find(|t| t.kind() == SyntaxKind::Comment)
922    }
923}
924
925#[cfg(test)]
926mod tests {
927    use super::*;
928    use crate::SyntaxTree;
929
930    #[test]
931    fn preceding_comments() {
932        let (tree, diagnostics) = SyntaxTree::parse(
933            "version 1.2
934
935# This comment should not be included
936task foo {} # This comment should not be included
937
938# Some
939# comments
940# are
941# long
942    
943# Others are short
944
945#     and, yet    another
946workflow foo {} # This should not be collected.
947
948# This comment should not be included either.",
949        );
950
951        assert!(diagnostics.is_empty());
952
953        let workflow = tree.root().last_child().unwrap();
954        assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode);
955        let token = workflow.first_token().unwrap();
956        let mut trivia = token.preceding_trivia();
957        assert_eq!(trivia.next().unwrap().text(), "\n\n");
958        assert_eq!(trivia.next().unwrap().text(), "# Some");
959        assert_eq!(trivia.next().unwrap().text(), "# comments");
960        assert_eq!(trivia.next().unwrap().text(), "# are");
961        assert_eq!(trivia.next().unwrap().text(), "# long");
962        assert_eq!(trivia.next().unwrap().text(), "\n    \n");
963        assert_eq!(trivia.next().unwrap().text(), "# Others are short");
964        assert_eq!(trivia.next().unwrap().text(), "\n\n");
965        assert_eq!(trivia.next().unwrap().text(), "#     and, yet    another");
966        assert!(trivia.next().is_none());
967    }
968
969    #[test]
970    fn succeeding_comments() {
971        let (tree, diagnostics) = SyntaxTree::parse(
972            "version 1.2
973
974# This comment should not be included
975task foo {}
976
977# This should not be collected.
978workflow foo {} # Here is a comment that should be collected.
979
980# This comment should be included too.",
981        );
982
983        assert!(diagnostics.is_empty());
984
985        let workflow = tree.root().last_child().unwrap();
986        assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode);
987        let token = workflow.last_token().unwrap();
988        let mut trivia = token.succeeding_trivia();
989        assert_eq!(
990            trivia.next().unwrap().text(),
991            "# Here is a comment that should be collected."
992        );
993        assert_eq!(trivia.next().unwrap().text(), "\n\n");
994        assert_eq!(
995            trivia.next().unwrap().text(),
996            "# This comment should be included too."
997        );
998        assert!(trivia.next().is_none());
999    }
1000
1001    #[test]
1002    fn inline_comment() {
1003        let (tree, diagnostics) = SyntaxTree::parse(
1004            "version 1.2
1005
1006# This comment should not be included
1007task foo {}
1008
1009# This should not be collected.
1010workflow foo {} # Here is a comment that should be collected.
1011
1012# This comment should not be included either.",
1013        );
1014
1015        assert!(diagnostics.is_empty());
1016
1017        let workflow = tree.root().last_child().unwrap();
1018        assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode);
1019        let comment = workflow.last_token().unwrap().inline_comment().unwrap();
1020        assert_eq!(
1021            comment.text(),
1022            "# Here is a comment that should be collected."
1023        );
1024    }
1025}