wdl_ast/
lib.rs

1//! An abstract syntax tree for Workflow Description Language (WDL) documents.
2//!
3//! The AST implementation is effectively a facade over the concrete syntax tree
4//! (CST) implemented by [SyntaxTree] from `wdl-grammar`.
5//!
6//! An AST is cheap to construct and may be cheaply cloned at any level.
7//!
8//! However, an AST (and the underlying CST) are immutable; updating the tree
9//! requires replacing a node in the tree to produce a new tree. The unaffected
10//! nodes of the replacement are reused from the old tree to the new tree.
11//!
12//! # Examples
13//!
14//! An example of parsing a WDL document into an AST and validating it:
15//!
16//! ```rust
17//! # let source = "version 1.1\nworkflow test {}";
18//! use wdl_ast::Document;
19//! use wdl_ast::Validator;
20//!
21//! let (document, diagnostics) = Document::parse(source);
22//! if !diagnostics.is_empty() {
23//!     // Handle the failure to parse
24//! }
25//!
26//! let mut validator = Validator::default();
27//! if let Err(diagnostics) = validator.validate(&document) {
28//!     // Handle the failure to validate
29//! }
30//! ```
31
32#![warn(missing_docs)]
33#![warn(rust_2018_idioms)]
34#![warn(rust_2021_compatibility)]
35#![warn(missing_debug_implementations)]
36#![warn(clippy::missing_docs_in_private_items)]
37#![warn(rustdoc::broken_intra_doc_links)]
38
39use std::collections::HashSet;
40use std::fmt;
41
42pub use rowan::Direction;
43use rowan::NodeOrToken;
44use v1::CloseBrace;
45use v1::CloseHeredoc;
46use v1::OpenBrace;
47use v1::OpenHeredoc;
48pub use wdl_grammar::Diagnostic;
49pub use wdl_grammar::Label;
50pub use wdl_grammar::Severity;
51pub use wdl_grammar::Span;
52pub use wdl_grammar::SupportedVersion;
53pub use wdl_grammar::SyntaxElement;
54pub use wdl_grammar::SyntaxExt;
55pub use wdl_grammar::SyntaxKind;
56pub use wdl_grammar::SyntaxNode;
57pub use wdl_grammar::SyntaxToken;
58pub use wdl_grammar::SyntaxTokenExt;
59pub use wdl_grammar::SyntaxTree;
60pub use wdl_grammar::WorkflowDescriptionLanguage;
61pub use wdl_grammar::version;
62
63pub mod v1;
64
65mod element;
66mod validation;
67mod visitor;
68
69pub use element::*;
70pub use validation::*;
71pub use visitor::*;
72
73/// A trait that abstracts the underlying representation of a syntax tree node.
74///
75/// The default node type is `SyntaxNode` for all AST nodes.
76pub trait TreeNode: Clone + fmt::Debug + PartialEq + Eq + std::hash::Hash {
77    /// The associated token type for the tree node.
78    type Token: TreeToken;
79
80    /// Gets the parent node of the node.
81    ///
82    /// Returns `None` if the node is a root.
83    fn parent(&self) -> Option<Self>;
84
85    /// Gets the syntax kind of the node.
86    fn kind(&self) -> SyntaxKind;
87
88    /// Gets the text of the node.
89    ///
90    /// Node text is not contiguous, so the returned value implements `Display`.
91    fn text(&self) -> impl fmt::Display;
92
93    /// Gets the span of the node.
94    fn span(&self) -> Span;
95
96    /// Gets the children nodes of the node.
97    fn children(&self) -> impl Iterator<Item = Self>;
98
99    /// Gets all the children of the node, including tokens.
100    fn children_with_tokens(&self) -> impl Iterator<Item = NodeOrToken<Self, Self::Token>>;
101
102    /// Gets the last token of the node.
103    fn last_token(&self) -> Option<Self::Token>;
104
105    /// Gets the node descendants of the node.
106    fn descendants(&self) -> impl Iterator<Item = Self>;
107
108    /// Gets the ancestors of the node.
109    fn ancestors(&self) -> impl Iterator<Item = Self>;
110
111    /// Determines if a given rule id is excepted for the node.
112    fn is_rule_excepted(&self, id: &str) -> bool;
113}
114
115/// A trait that abstracts the underlying representation of a syntax token.
116pub trait TreeToken: Clone + fmt::Debug + PartialEq + Eq + std::hash::Hash {
117    /// The associated node type for the token.
118    type Node: TreeNode;
119
120    /// Gets the parent node of the token.
121    fn parent(&self) -> Self::Node;
122
123    /// Gets the syntax kind for the token.
124    fn kind(&self) -> SyntaxKind;
125
126    /// Gets the text of the token.
127    fn text(&self) -> &str;
128
129    /// Gets the span of the token.
130    fn span(&self) -> Span;
131}
132
133/// A trait implemented by AST nodes.
134pub trait AstNode<N: TreeNode>: Sized {
135    /// Determines if the kind can be cast to this representation.
136    fn can_cast(kind: SyntaxKind) -> bool;
137
138    /// Casts the given inner type to the this representation.
139    fn cast(inner: N) -> Option<Self>;
140
141    /// Gets the inner type from this representation.
142    fn inner(&self) -> &N;
143
144    /// Gets the syntax kind of the node.
145    fn kind(&self) -> SyntaxKind {
146        self.inner().kind()
147    }
148
149    /// Gets the text of the node.
150    ///
151    /// As node text is not contiguous, this returns a type that implements
152    /// `Display`.
153    fn text<'a>(&'a self) -> impl fmt::Display
154    where
155        N: 'a,
156    {
157        self.inner().text()
158    }
159
160    /// Gets the span of the node.
161    fn span(&self) -> Span {
162        self.inner().span()
163    }
164
165    /// Gets the first token child that can cast to an expected type.
166    fn token<C>(&self) -> Option<C>
167    where
168        C: AstToken<N::Token>,
169    {
170        self.inner()
171            .children_with_tokens()
172            .filter_map(|e| e.into_token())
173            .find_map(|t| C::cast(t))
174    }
175
176    /// Gets all the token children that can cast to an expected type.
177    fn tokens<'a, C>(&'a self) -> impl Iterator<Item = C>
178    where
179        C: AstToken<N::Token>,
180        N: 'a,
181    {
182        self.inner()
183            .children_with_tokens()
184            .filter_map(|e| e.into_token().and_then(C::cast))
185    }
186
187    /// Gets the last token of the node and attempts to cast it to an expected
188    /// type.
189    ///
190    /// Returns `None` if there is no last token or if it cannot be casted to
191    /// the expected type.
192    fn last_token<C>(&self) -> Option<C>
193    where
194        C: AstToken<N::Token>,
195    {
196        self.inner().last_token().and_then(C::cast)
197    }
198
199    /// Gets the first node child that can cast to an expected type.
200    fn child<C>(&self) -> Option<C>
201    where
202        C: AstNode<N>,
203    {
204        self.inner().children().find_map(C::cast)
205    }
206
207    /// Gets all node children that can cast to an expected type.
208    fn children<'a, C>(&'a self) -> impl Iterator<Item = C>
209    where
210        C: AstNode<N>,
211        N: 'a,
212    {
213        self.inner().children().filter_map(C::cast)
214    }
215
216    /// Gets the parent of the node if the underlying tree node has a parent.
217    ///
218    /// Returns `None` if the node has no parent or if the parent node is not of
219    /// the expected type.
220    fn parent<'a, P>(&self) -> Option<P>
221    where
222        P: AstNode<N>,
223        N: 'a,
224    {
225        P::cast(self.inner().parent()?)
226    }
227
228    /// Calculates the span of a scope given the node where the scope is
229    /// visible.
230    ///
231    /// Returns `None` if the node does not contain the open and close tokens as
232    /// children.
233    fn scope_span<O, C>(&self) -> Option<Span>
234    where
235        O: AstToken<N::Token>,
236        C: AstToken<N::Token>,
237    {
238        let open = self.token::<O>()?.span();
239        let close = self.last_token::<C>()?.span();
240
241        // The span starts after the opening brace and before the closing brace
242        Some(Span::new(open.end(), close.start() - open.end()))
243    }
244
245    /// Gets the interior span of child opening and closing brace tokens for the
246    /// node.
247    ///
248    /// The span starts from immediately after the opening brace token and ends
249    /// immediately before the closing brace token.
250    ///
251    /// Returns `None` if the node does not contain child brace tokens.
252    fn braced_scope_span(&self) -> Option<Span> {
253        self.scope_span::<OpenBrace<N::Token>, CloseBrace<N::Token>>()
254    }
255
256    /// Gets the interior span of child opening and closing heredoc tokens for
257    /// the node.
258    ///
259    /// The span starts from immediately after the opening heredoc token and
260    /// ends immediately before the closing heredoc token.
261    ///
262    /// Returns `None` if the node does not contain child heredoc tokens.
263    fn heredoc_scope_span(&self) -> Option<Span> {
264        self.scope_span::<OpenHeredoc<N::Token>, CloseHeredoc<N::Token>>()
265    }
266
267    /// Gets the node descendants (including self) from this node that can be
268    /// cast to the expected type.
269    fn descendants<'a, D>(&'a self) -> impl Iterator<Item = D>
270    where
271        D: AstNode<N>,
272        N: 'a,
273    {
274        self.inner().descendants().filter_map(|d| D::cast(d))
275    }
276}
277
278/// A trait implemented by AST tokens.
279pub trait AstToken<T: TreeToken>: Sized {
280    /// Determines if the kind can be cast to this representation.
281    fn can_cast(kind: SyntaxKind) -> bool;
282
283    /// Casts the given inner type to the this representation.
284    fn cast(inner: T) -> Option<Self>;
285
286    /// Gets the inner type from this representation.
287    fn inner(&self) -> &T;
288
289    /// Gets the syntax kind of the token.
290    fn kind(&self) -> SyntaxKind {
291        self.inner().kind()
292    }
293
294    /// Gets the text of the token.
295    fn text<'a>(&'a self) -> &'a str
296    where
297        T: 'a,
298    {
299        self.inner().text()
300    }
301
302    /// Gets the span of the token.
303    fn span(&self) -> Span {
304        self.inner().span()
305    }
306
307    /// Gets the parent of the token.
308    ///
309    /// Returns `None` if the parent node cannot be cast to the expected type.
310    fn parent<'a, P>(&self) -> Option<P>
311    where
312        P: AstNode<T::Node>,
313        T: 'a,
314    {
315        P::cast(self.inner().parent())
316    }
317}
318
319/// Implemented by nodes that can create a new root from a different tree node
320/// type.
321pub trait NewRoot<N: TreeNode>: Sized {
322    /// Constructs a new root node from the give root node of a different tree
323    /// node type.
324    fn new_root(root: N) -> Self;
325}
326
327impl TreeNode for SyntaxNode {
328    type Token = SyntaxToken;
329
330    fn parent(&self) -> Option<SyntaxNode> {
331        self.parent()
332    }
333
334    fn kind(&self) -> SyntaxKind {
335        self.kind()
336    }
337
338    fn children(&self) -> impl Iterator<Item = Self> {
339        self.children()
340    }
341
342    fn children_with_tokens(&self) -> impl Iterator<Item = NodeOrToken<Self, Self::Token>> {
343        self.children_with_tokens()
344    }
345
346    fn text(&self) -> impl fmt::Display {
347        self.text()
348    }
349
350    fn span(&self) -> Span {
351        let range = self.text_range();
352        let start = usize::from(range.start());
353        Span::new(start, usize::from(range.end()) - start)
354    }
355
356    fn last_token(&self) -> Option<Self::Token> {
357        self.last_token()
358    }
359
360    fn descendants(&self) -> impl Iterator<Item = Self> {
361        self.descendants()
362    }
363
364    fn ancestors(&self) -> impl Iterator<Item = Self> {
365        self.ancestors()
366    }
367
368    fn is_rule_excepted(&self, id: &str) -> bool {
369        <Self as SyntaxNodeExt>::is_rule_excepted(self, id)
370    }
371}
372
373impl TreeToken for SyntaxToken {
374    type Node = SyntaxNode;
375
376    fn parent(&self) -> SyntaxNode {
377        self.parent().expect("token should have a parent")
378    }
379
380    fn kind(&self) -> SyntaxKind {
381        self.kind()
382    }
383
384    fn text(&self) -> &str {
385        self.text()
386    }
387
388    fn span(&self) -> Span {
389        let range = self.text_range();
390        let start = usize::from(range.start());
391        Span::new(start, usize::from(range.end()) - start)
392    }
393}
394
395/// Represents the reason an AST node has been visited.
396///
397/// Each node is visited exactly once, but the visitor will receive
398/// a call for entering the node and a call for exiting the node.
399#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
400pub enum VisitReason {
401    /// The visit has entered the node.
402    Enter,
403    /// The visit has exited the node.
404    Exit,
405}
406
407/// An extension trait for syntax nodes.
408pub trait SyntaxNodeExt {
409    /// Gets an iterator over the `@except` comments for a syntax node.
410    fn except_comments(&self) -> impl Iterator<Item = SyntaxToken> + '_;
411
412    /// Gets the AST node's rule exceptions set.
413    ///
414    /// The set is the comma-delimited list of rule identifiers that follows a
415    /// `#@ except:` comment.
416    fn rule_exceptions(&self) -> HashSet<String>;
417
418    /// Determines if a given rule id is excepted for the syntax node.
419    fn is_rule_excepted(&self, id: &str) -> bool;
420}
421
422impl SyntaxNodeExt for SyntaxNode {
423    fn except_comments(&self) -> impl Iterator<Item = SyntaxToken> + '_ {
424        self.siblings_with_tokens(Direction::Prev)
425            .skip(1)
426            .map_while(|s| {
427                if s.kind() == SyntaxKind::Whitespace || s.kind() == SyntaxKind::Comment {
428                    s.into_token()
429                } else {
430                    None
431                }
432            })
433            .filter(|t| t.kind() == SyntaxKind::Comment)
434    }
435
436    fn rule_exceptions(&self) -> HashSet<String> {
437        let mut set = HashSet::default();
438        for comment in self.except_comments() {
439            if let Some(ids) = comment.text().strip_prefix(EXCEPT_COMMENT_PREFIX) {
440                for id in ids.split(',') {
441                    let id = id.trim();
442                    set.insert(id.to_string());
443                }
444            }
445        }
446
447        set
448    }
449
450    fn is_rule_excepted(&self, id: &str) -> bool {
451        for comment in self.except_comments() {
452            if let Some(ids) = comment.text().strip_prefix(EXCEPT_COMMENT_PREFIX) {
453                if ids.split(',').any(|i| i.trim() == id) {
454                    return true;
455                }
456            }
457        }
458
459        false
460    }
461}
462
463/// Represents the AST of a [Document].
464///
465/// See [Document::ast].
466#[derive(Clone, Debug, PartialEq, Eq)]
467pub enum Ast<N: TreeNode = SyntaxNode> {
468    /// The WDL document specifies an unsupported version.
469    Unsupported,
470    /// The WDL document is V1.
471    V1(v1::Ast<N>),
472}
473
474impl<N: TreeNode> Ast<N> {
475    /// Gets the AST as a V1 AST.
476    ///
477    /// Returns `None` if the AST is not a V1 AST.
478    pub fn as_v1(&self) -> Option<&v1::Ast<N>> {
479        match self {
480            Self::V1(ast) => Some(ast),
481            _ => None,
482        }
483    }
484
485    /// Consumes `self` and attempts to return the V1 AST.
486    pub fn into_v1(self) -> Option<v1::Ast<N>> {
487        match self {
488            Self::V1(ast) => Some(ast),
489            _ => None,
490        }
491    }
492
493    /// Consumes `self` and attempts to return the V1 AST.
494    ///
495    /// # Panics
496    ///
497    /// Panics if the AST is not a V1 AST.
498    pub fn unwrap_v1(self) -> v1::Ast<N> {
499        self.into_v1().expect("the AST is not a V1 AST")
500    }
501}
502
503/// Represents a single WDL document.
504///
505/// See [Document::ast] for getting a version-specific Abstract
506/// Syntax Tree.
507#[derive(Clone, PartialEq, Eq, Hash)]
508pub struct Document<N: TreeNode = SyntaxNode>(N);
509
510impl<N: TreeNode> AstNode<N> for Document<N> {
511    fn can_cast(kind: SyntaxKind) -> bool {
512        kind == SyntaxKind::RootNode
513    }
514
515    fn cast(inner: N) -> Option<Self> {
516        if Self::can_cast(inner.kind()) {
517            Some(Self(inner))
518        } else {
519            None
520        }
521    }
522
523    fn inner(&self) -> &N {
524        &self.0
525    }
526}
527
528impl Document {
529    /// Parses a document from the given source.
530    ///
531    /// A document and its AST elements are trivially cloned.
532    ///
533    /// # Example
534    ///
535    /// ```rust
536    /// # use wdl_ast::{Document, AstToken, Ast};
537    /// let (document, diagnostics) = Document::parse("version 1.1");
538    /// assert!(diagnostics.is_empty());
539    ///
540    /// assert_eq!(
541    ///     document
542    ///         .version_statement()
543    ///         .expect("should have version statement")
544    ///         .version()
545    ///         .text(),
546    ///     "1.1"
547    /// );
548    ///
549    /// match document.ast() {
550    ///     Ast::V1(ast) => {
551    ///         assert_eq!(ast.items().count(), 0);
552    ///     }
553    ///     Ast::Unsupported => panic!("should be a V1 AST"),
554    /// }
555    /// ```
556    pub fn parse(source: &str) -> (Self, Vec<Diagnostic>) {
557        let (tree, diagnostics) = SyntaxTree::parse(source);
558        (
559            Document::cast(tree.into_syntax()).expect("document should cast"),
560            diagnostics,
561        )
562    }
563}
564
565impl<N: TreeNode> Document<N> {
566    /// Gets the version statement of the document.
567    ///
568    /// This can be used to determine the version of the document that was
569    /// parsed.
570    ///
571    /// A return value of `None` signifies a missing version statement.
572    pub fn version_statement(&self) -> Option<VersionStatement<N>> {
573        self.child()
574    }
575
576    /// Gets the AST representation of the document.
577    pub fn ast(&self) -> Ast<N> {
578        self.version_statement()
579            .as_ref()
580            .and_then(|s| s.version().text().parse::<SupportedVersion>().ok())
581            .map(|v| match v {
582                SupportedVersion::V1(_) => Ast::V1(v1::Ast(self.0.clone())),
583                _ => Ast::Unsupported,
584            })
585            .unwrap_or(Ast::Unsupported)
586    }
587
588    /// Morphs a document of one node type to a document of a different node
589    /// type.
590    pub fn morph<U: TreeNode + NewRoot<N>>(self) -> Document<U> {
591        Document(U::new_root(self.0))
592    }
593}
594
595impl Document<SyntaxNode> {
596    /// Visits the document with a pre-order traversal using the provided
597    /// visitor to visit each element in the document.
598    pub fn visit<V: Visitor>(&self, state: &mut V::State, visitor: &mut V) {
599        visit(&self.0, state, visitor)
600    }
601}
602
603impl fmt::Debug for Document {
604    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
605        self.0.fmt(f)
606    }
607}
608
609/// Represents a whitespace token in the AST.
610#[derive(Clone, Debug, PartialEq, Eq, Hash)]
611pub struct Whitespace<T: TreeToken = SyntaxToken>(T);
612
613impl<T: TreeToken> AstToken<T> for Whitespace<T> {
614    fn can_cast(kind: SyntaxKind) -> bool {
615        kind == SyntaxKind::Whitespace
616    }
617
618    fn cast(inner: T) -> Option<Self> {
619        match inner.kind() {
620            SyntaxKind::Whitespace => Some(Self(inner)),
621            _ => None,
622        }
623    }
624
625    fn inner(&self) -> &T {
626        &self.0
627    }
628}
629
630/// Represents a comment token in the AST.
631#[derive(Debug, Clone, PartialEq, Eq, Hash)]
632pub struct Comment<T: TreeToken = SyntaxToken>(T);
633
634impl<T: TreeToken> AstToken<T> for Comment<T> {
635    fn can_cast(kind: SyntaxKind) -> bool {
636        kind == SyntaxKind::Comment
637    }
638
639    fn cast(inner: T) -> Option<Self> {
640        match inner.kind() {
641            SyntaxKind::Comment => Some(Self(inner)),
642            _ => None,
643        }
644    }
645
646    fn inner(&self) -> &T {
647        &self.0
648    }
649}
650
651/// Represents a version statement in a WDL AST.
652#[derive(Debug, Clone, PartialEq, Eq, Hash)]
653pub struct VersionStatement<N: TreeNode = SyntaxNode>(N);
654
655impl<N: TreeNode> VersionStatement<N> {
656    /// Gets the version of the version statement.
657    pub fn version(&self) -> Version<N::Token> {
658        self.token()
659            .expect("version statement must have a version token")
660    }
661
662    /// Gets the version keyword of the version statement.
663    pub fn keyword(&self) -> v1::VersionKeyword<N::Token> {
664        self.token()
665            .expect("version statement must have a version keyword")
666    }
667}
668
669impl<N: TreeNode> AstNode<N> for VersionStatement<N> {
670    fn can_cast(kind: SyntaxKind) -> bool {
671        kind == SyntaxKind::VersionStatementNode
672    }
673
674    fn cast(inner: N) -> Option<Self> {
675        match inner.kind() {
676            SyntaxKind::VersionStatementNode => Some(Self(inner)),
677            _ => None,
678        }
679    }
680
681    fn inner(&self) -> &N {
682        &self.0
683    }
684}
685
686/// Represents a version in the AST.
687#[derive(Clone, Debug, PartialEq, Eq, Hash)]
688pub struct Version<T: TreeToken = SyntaxToken>(T);
689
690impl<T: TreeToken> AstToken<T> for Version<T> {
691    fn can_cast(kind: SyntaxKind) -> bool {
692        kind == SyntaxKind::Version
693    }
694
695    fn cast(inner: T) -> Option<Self> {
696        match inner.kind() {
697            SyntaxKind::Version => Some(Self(inner)),
698            _ => None,
699        }
700    }
701
702    fn inner(&self) -> &T {
703        &self.0
704    }
705}
706
707/// Represents an identifier token.
708#[derive(Debug, Clone, PartialEq, Eq, Hash)]
709pub struct Ident<T: TreeToken = SyntaxToken>(T);
710
711impl<T: TreeToken> Ident<T> {
712    /// Gets a hashable representation of the identifier.
713    pub fn hashable(&self) -> TokenText<T> {
714        TokenText(self.0.clone())
715    }
716}
717
718impl<T: TreeToken> AstToken<T> for Ident<T> {
719    fn can_cast(kind: SyntaxKind) -> bool {
720        kind == SyntaxKind::Ident
721    }
722
723    fn cast(inner: T) -> Option<Self> {
724        match inner.kind() {
725            SyntaxKind::Ident => Some(Self(inner)),
726            _ => None,
727        }
728    }
729
730    fn inner(&self) -> &T {
731        &self.0
732    }
733}
734
735/// Helper for hashing tokens by their text.
736///
737/// Normally a token's equality and hash implementation work by comparing
738/// the token's element in the tree; thus, two tokens with the same text
739/// but different positions in the tree will compare and hash differently.
740///
741/// With this hash implementation, two tokens compare and hash identically if
742/// their text is identical.
743#[derive(Debug, Clone)]
744pub struct TokenText<T: TreeToken = SyntaxToken>(T);
745
746impl TokenText {
747    /// Gets the text of the underlying token.
748    pub fn text(&self) -> &str {
749        self.0.text()
750    }
751
752    /// Gets the span of the underlying token.
753    pub fn span(&self) -> Span {
754        self.0.span()
755    }
756}
757
758impl<T: TreeToken> PartialEq for TokenText<T> {
759    fn eq(&self, other: &Self) -> bool {
760        self.0.text() == other.0.text()
761    }
762}
763
764impl<T: TreeToken> Eq for TokenText<T> {}
765
766impl<T: TreeToken> std::hash::Hash for TokenText<T> {
767    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
768        self.0.text().hash(state);
769    }
770}
771
772impl<T: TreeToken> std::borrow::Borrow<str> for TokenText<T> {
773    fn borrow(&self) -> &str {
774        self.0.text()
775    }
776}