wdl_analysis/
document.rs

1//! Representation of analyzed WDL documents.
2
3use std::borrow::Cow;
4use std::collections::HashMap;
5use std::path::Path;
6use std::sync::Arc;
7
8use indexmap::IndexMap;
9use petgraph::graph::NodeIndex;
10use rowan::GreenNode;
11use url::Url;
12use uuid::Uuid;
13use wdl_ast::Ast;
14use wdl_ast::AstNode;
15use wdl_ast::Diagnostic;
16use wdl_ast::Severity;
17use wdl_ast::Span;
18use wdl_ast::SupportedVersion;
19use wdl_ast::SyntaxNode;
20
21use crate::config::Config;
22use crate::diagnostics::unused_import;
23use crate::graph::DocumentGraph;
24use crate::graph::ParseState;
25use crate::types::CallType;
26use crate::types::Type;
27
28mod v1;
29
30/// The `task` variable name available in task command sections and outputs in
31/// WDL 1.2.
32pub const TASK_VAR_NAME: &str = "task";
33
34/// Represents a namespace introduced by an import.
35#[derive(Debug)]
36pub struct Namespace {
37    /// The span of the import that introduced the namespace.
38    span: Span,
39    /// The URI of the imported document that introduced the namespace.
40    source: Arc<Url>,
41    /// The namespace's document.
42    document: Document,
43    /// Whether or not the namespace is used (i.e. referenced) in the document.
44    used: bool,
45    /// Whether or not the namespace is excepted from the "unused import"
46    /// diagnostic.
47    excepted: bool,
48}
49
50impl Namespace {
51    /// Gets the span of the import that introduced the namespace.
52    pub fn span(&self) -> Span {
53        self.span
54    }
55
56    /// Gets the URI of the imported document that introduced the namespace.
57    pub fn source(&self) -> &Arc<Url> {
58        &self.source
59    }
60
61    /// Gets the imported document.
62    pub fn document(&self) -> &Document {
63        &self.document
64    }
65}
66
67/// Represents a struct in a document.
68#[derive(Debug, Clone)]
69pub struct Struct {
70    /// The name of the struct.
71    name: String,
72    /// The span that introduced the struct.
73    ///
74    /// This is either the name of a struct definition (local) or an import's
75    /// URI or alias (imported).
76    name_span: Span,
77    /// The offset of the CST node from the start of the document.
78    ///
79    /// This is used to adjust diagnostics resulting from traversing the struct
80    /// node as if it were the root of the CST.
81    offset: usize,
82    /// Stores the CST node of the struct.
83    ///
84    /// This is used to calculate type equivalence for imports.
85    node: rowan::GreenNode,
86    /// The namespace that defines the struct.
87    ///
88    /// This is `Some` only for imported structs.
89    namespace: Option<String>,
90    /// The type of the struct.
91    ///
92    /// Initially this is `None` until a type check occurs.
93    ty: Option<Type>,
94}
95
96impl Struct {
97    /// Gets the name of the struct.
98    pub fn name(&self) -> &str {
99        &self.name
100    }
101
102    /// Gets the span of the name.
103    pub fn name_span(&self) -> Span {
104        self.name_span
105    }
106
107    /// Gets the offset of the struct
108    pub fn offset(&self) -> usize {
109        self.offset
110    }
111
112    /// Gets the node of the struct.
113    pub fn node(&self) -> &rowan::GreenNode {
114        &self.node
115    }
116
117    /// Gets the namespace that defines this struct.
118    ///
119    /// Returns `None` for structs defined in the containing document or `Some`
120    /// for a struct introduced by an import.
121    pub fn namespace(&self) -> Option<&str> {
122        self.namespace.as_deref()
123    }
124
125    /// Gets the type of the struct.
126    ///
127    /// A value of `None` indicates that the type could not be determined for
128    /// the struct; this may happen if the struct definition is recursive.
129    pub fn ty(&self) -> Option<&Type> {
130        self.ty.as_ref()
131    }
132}
133
134/// Represents information about a name in a scope.
135#[derive(Debug, Clone)]
136pub struct Name {
137    /// The span of the name.
138    span: Span,
139    /// The type of the name.
140    ty: Type,
141}
142
143impl Name {
144    /// Gets the span of the name.
145    pub fn span(&self) -> Span {
146        self.span
147    }
148
149    /// Gets the type of the name.
150    pub fn ty(&self) -> &Type {
151        &self.ty
152    }
153}
154
155/// Represents an index of a scope in a collection of scopes.
156#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
157struct ScopeIndex(usize);
158
159/// Represents a scope in a WDL document.
160#[derive(Debug)]
161struct Scope {
162    /// The index of the parent scope.
163    ///
164    /// This is `None` for task and workflow scopes.
165    parent: Option<ScopeIndex>,
166    /// The span in the document where the names of the scope are visible.
167    span: Span,
168    /// The map of names in scope to their span and types.
169    names: IndexMap<String, Name>,
170}
171
172impl Scope {
173    /// Creates a new scope given the parent scope and span.
174    fn new(parent: Option<ScopeIndex>, span: Span) -> Self {
175        Self {
176            parent,
177            span,
178            names: Default::default(),
179        }
180    }
181
182    /// Inserts a name into the scope.
183    pub fn insert(&mut self, name: impl Into<String>, span: Span, ty: Type) {
184        self.names.insert(name.into(), Name { span, ty });
185    }
186}
187
188/// Represents a reference to a scope.
189#[derive(Debug, Clone, Copy)]
190pub struct ScopeRef<'a> {
191    /// The reference to the scopes collection.
192    scopes: &'a [Scope],
193    /// The index of the scope in the collection.
194    index: ScopeIndex,
195}
196
197impl<'a> ScopeRef<'a> {
198    /// Creates a new scope reference given the scope index.
199    fn new(scopes: &'a [Scope], index: ScopeIndex) -> Self {
200        Self { scopes, index }
201    }
202
203    /// Gets the span of the scope.
204    pub fn span(&self) -> Span {
205        self.scopes[self.index.0].span
206    }
207
208    /// Gets the parent scope.
209    ///
210    /// Returns `None` if there is no parent scope.
211    pub fn parent(&self) -> Option<Self> {
212        self.scopes[self.index.0].parent.map(|p| Self {
213            scopes: self.scopes,
214            index: p,
215        })
216    }
217
218    /// Gets all of the names available at this scope.
219    pub fn names(&self) -> impl Iterator<Item = (&str, &Name)> + use<'_> {
220        self.scopes[self.index.0]
221            .names
222            .iter()
223            .map(|(name, n)| (name.as_str(), n))
224    }
225
226    /// Gets a name local to this scope.
227    ///
228    /// Returns `None` if a name local to this scope was not found.
229    pub fn local(&self, name: &str) -> Option<&Name> {
230        self.scopes[self.index.0].names.get(name)
231    }
232
233    /// Lookups a name in the scope.
234    ///
235    /// Returns `None` if the name is not available in the scope.
236    pub fn lookup(&self, name: &str) -> Option<&Name> {
237        let mut current = Some(self.index);
238
239        while let Some(index) = current {
240            if let Some(name) = self.scopes[index.0].names.get(name) {
241                return Some(name);
242            }
243
244            current = self.scopes[index.0].parent;
245        }
246
247        None
248    }
249}
250
251/// Represents a mutable reference to a scope.
252#[derive(Debug)]
253struct ScopeRefMut<'a> {
254    /// The reference to all scopes.
255    scopes: &'a mut [Scope],
256    /// The index to the scope.
257    index: ScopeIndex,
258}
259
260impl<'a> ScopeRefMut<'a> {
261    /// Creates a new mutable scope reference given the scope index.
262    fn new(scopes: &'a mut [Scope], index: ScopeIndex) -> Self {
263        Self { scopes, index }
264    }
265
266    /// Lookups a name in the scope.
267    ///
268    /// Returns `None` if the name is not available in the scope.
269    pub fn lookup(&self, name: &str) -> Option<&Name> {
270        let mut current = Some(self.index);
271
272        while let Some(index) = current {
273            if let Some(name) = self.scopes[index.0].names.get(name) {
274                return Some(name);
275            }
276
277            current = self.scopes[index.0].parent;
278        }
279
280        None
281    }
282
283    /// Inserts a name into the scope.
284    pub fn insert(&mut self, name: impl Into<String>, span: Span, ty: Type) {
285        self.scopes[self.index.0]
286            .names
287            .insert(name.into(), Name { span, ty });
288    }
289
290    /// Converts the mutable scope reference to an immutable scope reference.
291    pub fn as_scope_ref(&'a self) -> ScopeRef<'a> {
292        ScopeRef {
293            scopes: self.scopes,
294            index: self.index,
295        }
296    }
297}
298
299/// Represents a task or workflow input.
300#[derive(Debug, Clone, PartialEq, Eq)]
301pub struct Input {
302    /// The type of the input.
303    ty: Type,
304    /// Whether or not the input is required.
305    required: bool,
306}
307
308impl Input {
309    /// Gets the type of the input.
310    pub fn ty(&self) -> &Type {
311        &self.ty
312    }
313
314    /// Whether or not the input is required.
315    pub fn required(&self) -> bool {
316        self.required
317    }
318}
319
320/// Represents a task or workflow output.
321#[derive(Debug, Clone, PartialEq, Eq)]
322pub struct Output {
323    /// The type of the output.
324    ty: Type,
325    /// The span of the output name.
326    name_span: Span,
327}
328
329impl Output {
330    /// Creates a new output with the given type.
331    pub(crate) fn new(ty: Type, name_span: Span) -> Self {
332        Self { ty, name_span }
333    }
334
335    /// Gets the type of the output.
336    pub fn ty(&self) -> &Type {
337        &self.ty
338    }
339
340    /// Gets the span of output's name.
341    pub fn name_span(&self) -> Span {
342        self.name_span
343    }
344}
345
346/// Represents a task in a document.
347#[derive(Debug)]
348pub struct Task {
349    /// The span of the task name.
350    name_span: Span,
351    /// The name of the task.
352    name: String,
353    /// The scopes contained in the task.
354    ///
355    /// The first scope will always be the task's scope.
356    ///
357    /// The scopes will be in sorted order by span start.
358    scopes: Vec<Scope>,
359    /// The inputs of the task.
360    inputs: Arc<IndexMap<String, Input>>,
361    /// The outputs of the task.
362    outputs: Arc<IndexMap<String, Output>>,
363}
364
365impl Task {
366    /// Gets the name of the task.
367    pub fn name(&self) -> &str {
368        &self.name
369    }
370
371    /// Gets the span of the name.
372    pub fn name_span(&self) -> Span {
373        self.name_span
374    }
375
376    /// Gets the scope of the task.
377    pub fn scope(&self) -> ScopeRef<'_> {
378        ScopeRef::new(&self.scopes, ScopeIndex(0))
379    }
380
381    /// Gets the inputs of the task.
382    pub fn inputs(&self) -> &IndexMap<String, Input> {
383        &self.inputs
384    }
385
386    /// Gets the outputs of the task.
387    pub fn outputs(&self) -> &IndexMap<String, Output> {
388        &self.outputs
389    }
390}
391
392/// Represents a workflow in a document.
393#[derive(Debug)]
394pub struct Workflow {
395    /// The span of the workflow name.
396    name_span: Span,
397    /// The name of the workflow.
398    name: String,
399    /// The scopes contained in the workflow.
400    ///
401    /// The first scope will always be the workflow's scope.
402    ///
403    /// The scopes will be in sorted order by span start.
404    scopes: Vec<Scope>,
405    /// The inputs of the workflow.
406    inputs: Arc<IndexMap<String, Input>>,
407    /// The outputs of the workflow.
408    outputs: Arc<IndexMap<String, Output>>,
409    /// The calls made by the workflow.
410    calls: HashMap<String, CallType>,
411    /// Whether or not nested inputs are allowed for the workflow.
412    allows_nested_inputs: bool,
413}
414
415impl Workflow {
416    /// Gets the name of the workflow.
417    pub fn name(&self) -> &str {
418        &self.name
419    }
420
421    /// Gets the span of the name.
422    pub fn name_span(&self) -> Span {
423        self.name_span
424    }
425
426    /// Gets the scope of the workflow.
427    pub fn scope(&self) -> ScopeRef<'_> {
428        ScopeRef::new(&self.scopes, ScopeIndex(0))
429    }
430
431    /// Gets the inputs of the workflow.
432    pub fn inputs(&self) -> &IndexMap<String, Input> {
433        &self.inputs
434    }
435
436    /// Gets the outputs of the workflow.
437    pub fn outputs(&self) -> &IndexMap<String, Output> {
438        &self.outputs
439    }
440
441    /// Gets the calls made by the workflow.
442    pub fn calls(&self) -> &HashMap<String, CallType> {
443        &self.calls
444    }
445
446    /// Determines if the workflow allows nested inputs.
447    pub fn allows_nested_inputs(&self) -> bool {
448        self.allows_nested_inputs
449    }
450}
451
452/// Represents analysis data about a WDL document.
453#[derive(Debug)]
454struct DocumentData {
455    /// The configuration under which this document was analyzed.
456    config: Config,
457    /// The root CST node of the document.
458    ///
459    /// This is `None` when the document could not be parsed.
460    root: Option<GreenNode>,
461    /// The document identifier.
462    ///
463    /// The identifier changes every time the document is analyzed.
464    id: Arc<String>,
465    /// The URI of the analyzed document.
466    uri: Arc<Url>,
467    /// The version of the document.
468    version: Option<SupportedVersion>,
469    /// The namespaces in the document.
470    namespaces: IndexMap<String, Namespace>,
471    /// The tasks in the document.
472    tasks: IndexMap<String, Task>,
473    /// The singular workflow in the document.
474    workflow: Option<Workflow>,
475    /// The structs in the document.
476    structs: IndexMap<String, Struct>,
477    /// The diagnostics for the document.
478    diagnostics: Vec<Diagnostic>,
479}
480
481impl DocumentData {
482    /// Constructs a new analysis document data.
483    fn new(
484        config: Config,
485        uri: Arc<Url>,
486        root: Option<GreenNode>,
487        version: Option<SupportedVersion>,
488        diagnostics: Vec<Diagnostic>,
489    ) -> Self {
490        Self {
491            config,
492            root,
493            id: Uuid::new_v4().to_string().into(),
494            uri,
495            version,
496            namespaces: Default::default(),
497            tasks: Default::default(),
498            workflow: Default::default(),
499            structs: Default::default(),
500            diagnostics,
501        }
502    }
503}
504
505/// Represents an analyzed WDL document.
506///
507/// This type is cheaply cloned.
508#[derive(Debug, Clone)]
509pub struct Document {
510    /// The document data for the document.
511    data: Arc<DocumentData>,
512}
513
514impl Document {
515    /// Creates a new default document from a URI.
516    pub(crate) fn default_from_uri(uri: Arc<Url>) -> Self {
517        Self {
518            data: Arc::new(DocumentData::new(
519                Default::default(),
520                uri,
521                None,
522                None,
523                Default::default(),
524            )),
525        }
526    }
527
528    /// Creates a new analyzed document from a document graph node.
529    pub(crate) fn from_graph_node(
530        config: &Config,
531        graph: &DocumentGraph,
532        index: NodeIndex,
533    ) -> Self {
534        let node = graph.get(index);
535
536        let (wdl_version, diagnostics) = match node.parse_state() {
537            ParseState::NotParsed => panic!("node should have been parsed"),
538            ParseState::Error(_) => return Self::default_from_uri(node.uri().clone()),
539            ParseState::Parsed {
540                wdl_version,
541                diagnostics,
542                ..
543            } => (wdl_version, diagnostics),
544        };
545
546        let root = node.root().expect("node should have been parsed");
547        let (config, wdl_version) = match (root.version_statement(), wdl_version) {
548            (Some(stmt), Some(wdl_version)) => (
549                config.with_diagnostics_config(
550                    config.diagnostics_config().excepted_for_node(stmt.inner()),
551                ),
552                *wdl_version,
553            ),
554            _ => {
555                // Don't process a document with a missing version statement or an unsupported
556                // version unless a fallback version is configured
557                return Self {
558                    data: Arc::new(DocumentData::new(
559                        config.clone(),
560                        node.uri().clone(),
561                        Some(root.inner().green().into()),
562                        None,
563                        diagnostics.to_vec(),
564                    )),
565                };
566            }
567        };
568
569        let mut data = DocumentData::new(
570            config.clone(),
571            node.uri().clone(),
572            Some(root.inner().green().into()),
573            Some(wdl_version),
574            diagnostics.to_vec(),
575        );
576        match root.ast_with_version_fallback(config.fallback_version()) {
577            Ast::Unsupported => {}
578            Ast::V1(ast) => v1::populate_document(&mut data, &config, graph, index, &ast),
579        }
580
581        // Check for unused imports
582        if let Some(severity) = config.diagnostics_config().unused_import {
583            let DocumentData {
584                namespaces,
585                diagnostics,
586                ..
587            } = &mut data;
588
589            diagnostics.extend(
590                namespaces
591                    .iter()
592                    .filter(|(_, ns)| !ns.used && !ns.excepted)
593                    .map(|(name, ns)| unused_import(name, ns.span()).with_severity(severity)),
594            );
595        }
596
597        Self {
598            data: Arc::new(data),
599        }
600    }
601
602    /// Gets the analysis configuration.
603    pub fn config(&self) -> &Config {
604        &self.data.config
605    }
606
607    /// Gets the root AST document node.
608    ///
609    /// # Panics
610    ///
611    /// Panics if the document was not parsed.
612    pub fn root(&self) -> wdl_ast::Document {
613        wdl_ast::Document::cast(SyntaxNode::new_root(
614            self.data.root.clone().expect("should have a root"),
615        ))
616        .expect("should cast")
617    }
618
619    /// Gets the identifier of the document.
620    ///
621    /// This value changes when a document is reanalyzed.
622    pub fn id(&self) -> &Arc<String> {
623        &self.data.id
624    }
625
626    /// Gets the URI of the document.
627    pub fn uri(&self) -> &Arc<Url> {
628        &self.data.uri
629    }
630
631    /// Gets the path to the document.
632    ///
633    /// If the scheme of the document's URI is not `file`, this will return the
634    /// URI as a string. Otherwise, this will attempt to return the path
635    /// relative to the current working directory, or the absolute path
636    /// failing that.
637    pub fn path(&self) -> Cow<'_, str> {
638        if let Ok(path) = self.data.uri.to_file_path() {
639            if let Some(path) = std::env::current_dir()
640                .ok()
641                .and_then(|cwd| path.strip_prefix(cwd).ok().and_then(Path::to_str))
642            {
643                return path.to_string().into();
644            }
645
646            if let Ok(path) = path.into_os_string().into_string() {
647                return path.into();
648            }
649        }
650
651        self.data.uri.as_str().into()
652    }
653
654    /// Gets the supported version of the document.
655    ///
656    /// Returns `None` if the document could not be parsed or contains an
657    /// unsupported version.
658    pub fn version(&self) -> Option<SupportedVersion> {
659        self.data.version
660    }
661
662    /// Gets the namespaces in the document.
663    pub fn namespaces(&self) -> impl Iterator<Item = (&str, &Namespace)> {
664        self.data.namespaces.iter().map(|(n, ns)| (n.as_str(), ns))
665    }
666
667    /// Gets a namespace in the document by name.
668    pub fn namespace(&self, name: &str) -> Option<&Namespace> {
669        self.data.namespaces.get(name)
670    }
671
672    /// Gets the tasks in the document.
673    pub fn tasks(&self) -> impl Iterator<Item = &Task> {
674        self.data.tasks.iter().map(|(_, t)| t)
675    }
676
677    /// Gets a task in the document by name.
678    pub fn task_by_name(&self, name: &str) -> Option<&Task> {
679        self.data.tasks.get(name)
680    }
681
682    /// Gets a workflow in the document.
683    ///
684    /// Returns `None` if the document did not contain a workflow.
685    pub fn workflow(&self) -> Option<&Workflow> {
686        self.data.workflow.as_ref()
687    }
688
689    /// Gets the structs in the document.
690    pub fn structs(&self) -> impl Iterator<Item = (&str, &Struct)> {
691        self.data.structs.iter().map(|(n, s)| (n.as_str(), s))
692    }
693
694    /// Gets a struct in the document by name.
695    pub fn struct_by_name(&self, name: &str) -> Option<&Struct> {
696        self.data.structs.get(name)
697    }
698
699    /// Gets the analysis diagnostics for the document.
700    pub fn diagnostics(&self) -> &[Diagnostic] {
701        &self.data.diagnostics
702    }
703
704    /// Sorts the diagnostics for the document.
705    ///
706    /// # Panics
707    ///
708    /// Panics if there is more than one reference to the document.
709    pub fn sort_diagnostics(&mut self) -> Self {
710        let data = &mut self.data;
711        let inner = Arc::get_mut(data).expect("should only have one reference");
712        inner.diagnostics.sort();
713        Self { data: data.clone() }
714    }
715
716    /// Extends the diagnostics for the document.
717    ///
718    /// # Panics
719    ///
720    /// Panics if there is more than one reference to the document.
721    pub fn extend_diagnostics(&mut self, diagnostics: Vec<Diagnostic>) -> Self {
722        let data = &mut self.data;
723        let inner = Arc::get_mut(data).expect("should only have one reference");
724        inner.diagnostics.extend(diagnostics);
725        Self { data: data.clone() }
726    }
727
728    /// Finds a scope based on a position within the document.
729    pub fn find_scope_by_position(&self, position: usize) -> Option<ScopeRef<'_>> {
730        /// Finds a scope within a collection of sorted scopes by position.
731        fn find_scope(scopes: &[Scope], position: usize) -> Option<ScopeRef<'_>> {
732            let mut index = match scopes.binary_search_by_key(&position, |s| s.span.start()) {
733                Ok(index) => index,
734                Err(index) => {
735                    // This indicates that we couldn't find a match and the match would go _before_
736                    // the first scope, so there is no containing scope.
737                    if index == 0 {
738                        return None;
739                    }
740
741                    index - 1
742                }
743            };
744
745            // We now have the index to start looking up the list of scopes
746            // We walk up the list to try to find a span that contains the position
747            loop {
748                let scope = &scopes[index];
749                if scope.span.contains(position) {
750                    return Some(ScopeRef::new(scopes, ScopeIndex(index)));
751                }
752
753                if index == 0 {
754                    return None;
755                }
756
757                index -= 1;
758            }
759        }
760
761        // Check to see if the position is contained in the workflow
762        if let Some(workflow) = &self.data.workflow {
763            if workflow.scope().span().contains(position) {
764                return find_scope(&workflow.scopes, position);
765            }
766        }
767
768        // Search for a task that might contain the position
769        let task = match self
770            .data
771            .tasks
772            .binary_search_by_key(&position, |_, t| t.scope().span().start())
773        {
774            Ok(index) => &self.data.tasks[index],
775            Err(index) => {
776                // This indicates that we couldn't find a match and the match would go _before_
777                // the first task, so there is no containing task.
778                if index == 0 {
779                    return None;
780                }
781
782                &self.data.tasks[index - 1]
783            }
784        };
785
786        if task.scope().span().contains(position) {
787            return find_scope(&task.scopes, position);
788        }
789
790        None
791    }
792
793    /// Determines if the document, or any documents transitively imported by
794    /// this document, has errors.
795    ///
796    /// Returns `true` if the document, or one of its transitive imports, has at
797    /// least one error diagnostic.
798    ///
799    /// Returns `false` if the document, and all of its transitive imports, have
800    /// no error diagnostics.
801    pub fn has_errors(&self) -> bool {
802        // Check this document for errors
803        if self
804            .diagnostics()
805            .iter()
806            .any(|d| d.severity() == Severity::Error)
807        {
808            return true;
809        }
810
811        // Check every imported document for errors
812        for (_, ns) in self.namespaces() {
813            if ns.document.has_errors() {
814                return true;
815            }
816        }
817
818        false
819    }
820
821    /// Visits the document with a pre-order traversal using the provided
822    /// visitor to visit each element in the document.
823    pub fn visit<V: crate::Visitor>(&self, diagnostics: &mut crate::Diagnostics, visitor: &mut V) {
824        crate::visit(self, diagnostics, visitor)
825    }
826}