wdl_analysis/
document.rs

1//! Representation of analyzed WDL documents.
2
3use std::borrow::Cow;
4use std::collections::HashMap;
5use std::path::Path;
6use std::str::FromStr;
7use std::sync::Arc;
8
9use indexmap::IndexMap;
10use petgraph::graph::NodeIndex;
11use rowan::GreenNode;
12use url::Url;
13use uuid::Uuid;
14use wdl_ast::Ast;
15use wdl_ast::AstNode;
16use wdl_ast::AstToken;
17use wdl_ast::Diagnostic;
18use wdl_ast::Severity;
19use wdl_ast::Span;
20use wdl_ast::SupportedVersion;
21use wdl_ast::SyntaxNode;
22
23use crate::DiagnosticsConfig;
24use crate::diagnostics::unused_import;
25use crate::graph::DocumentGraph;
26use crate::graph::ParseState;
27use crate::types::CallType;
28use crate::types::Type;
29
30mod v1;
31
32/// The `task` variable name available in task command sections and outputs in
33/// WDL 1.2.
34pub const TASK_VAR_NAME: &str = "task";
35
36/// Represents a namespace introduced by an import.
37#[derive(Debug)]
38pub struct Namespace {
39    /// The span of the import that introduced the namespace.
40    span: Span,
41    /// The URI of the imported document that introduced the namespace.
42    source: Arc<Url>,
43    /// The namespace's document.
44    document: Document,
45    /// Whether or not the namespace is used (i.e. referenced) in the document.
46    used: bool,
47    /// Whether or not the namespace is excepted from the "unused import"
48    /// diagnostic.
49    excepted: bool,
50}
51
52impl Namespace {
53    /// Gets the span of the import that introduced the namespace.
54    pub fn span(&self) -> Span {
55        self.span
56    }
57
58    /// Gets the URI of the imported document that introduced the namespace.
59    pub fn source(&self) -> &Arc<Url> {
60        &self.source
61    }
62
63    /// Gets the imported document.
64    pub fn document(&self) -> &Document {
65        &self.document
66    }
67}
68
69/// Represents a struct in a document.
70#[derive(Debug, Clone)]
71pub struct Struct {
72    /// The span that introduced the struct.
73    ///
74    /// This is either the name of a struct definition (local) or an import's
75    /// URI or alias (imported).
76    span: Span,
77    /// The offset of the CST node from the start of the document.
78    ///
79    /// This is used to adjust diagnostics resulting from traversing the struct
80    /// node as if it were the root of the CST.
81    offset: usize,
82    /// Stores the CST node of the struct.
83    ///
84    /// This is used to calculate type equivalence for imports.
85    node: rowan::GreenNode,
86    /// The namespace that defines the struct.
87    ///
88    /// This is `Some` only for imported structs.
89    namespace: Option<String>,
90    /// The type of the struct.
91    ///
92    /// Initially this is `None` until a type check occurs.
93    ty: Option<Type>,
94}
95
96impl Struct {
97    /// Gets the namespace that defines this struct.
98    ///
99    /// Returns `None` for structs defined in the containing document or `Some`
100    /// for a struct introduced by an import.
101    pub fn namespace(&self) -> Option<&str> {
102        self.namespace.as_deref()
103    }
104
105    /// Gets the type of the struct.
106    ///
107    /// A value of `None` indicates that the type could not be determined for
108    /// the struct; this may happen if the struct definition is recursive.
109    pub fn ty(&self) -> Option<&Type> {
110        self.ty.as_ref()
111    }
112}
113
114/// Represents information about a name in a scope.
115#[derive(Debug, Clone)]
116pub struct Name {
117    /// The span of the name.
118    span: Span,
119    /// The type of the name.
120    ty: Type,
121}
122
123impl Name {
124    /// Gets the span of the name.
125    pub fn span(&self) -> Span {
126        self.span
127    }
128
129    /// Gets the type of the name.
130    pub fn ty(&self) -> &Type {
131        &self.ty
132    }
133}
134
135/// Represents an index of a scope in a collection of scopes.
136#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
137struct ScopeIndex(usize);
138
139/// Represents a scope in a WDL document.
140#[derive(Debug)]
141struct Scope {
142    /// The index of the parent scope.
143    ///
144    /// This is `None` for task and workflow scopes.
145    parent: Option<ScopeIndex>,
146    /// The span in the document where the names of the scope are visible.
147    span: Span,
148    /// The map of names in scope to their span and types.
149    names: IndexMap<String, Name>,
150}
151
152impl Scope {
153    /// Creates a new scope given the parent scope and span.
154    fn new(parent: Option<ScopeIndex>, span: Span) -> Self {
155        Self {
156            parent,
157            span,
158            names: Default::default(),
159        }
160    }
161
162    /// Inserts a name into the scope.
163    pub fn insert(&mut self, name: impl Into<String>, span: Span, ty: Type) {
164        self.names.insert(name.into(), Name { span, ty });
165    }
166}
167
168/// Represents a reference to a scope.
169#[derive(Debug, Clone, Copy)]
170pub struct ScopeRef<'a> {
171    /// The reference to the scopes collection.
172    scopes: &'a [Scope],
173    /// The index of the scope in the collection.
174    index: ScopeIndex,
175}
176
177impl<'a> ScopeRef<'a> {
178    /// Creates a new scope reference given the scope index.
179    fn new(scopes: &'a [Scope], index: ScopeIndex) -> Self {
180        Self { scopes, index }
181    }
182
183    /// Gets the span of the scope.
184    pub fn span(&self) -> Span {
185        self.scopes[self.index.0].span
186    }
187
188    /// Gets the parent scope.
189    ///
190    /// Returns `None` if there is no parent scope.
191    pub fn parent(&self) -> Option<Self> {
192        self.scopes[self.index.0].parent.map(|p| Self {
193            scopes: self.scopes,
194            index: p,
195        })
196    }
197
198    /// Gets all of the names available at this scope.
199    pub fn names(&self) -> impl Iterator<Item = (&str, &Name)> + use<'_> {
200        self.scopes[self.index.0]
201            .names
202            .iter()
203            .map(|(name, n)| (name.as_str(), n))
204    }
205
206    /// Gets a name local to this scope.
207    ///
208    /// Returns `None` if a name local to this scope was not found.
209    pub fn local(&self, name: &str) -> Option<&Name> {
210        self.scopes[self.index.0].names.get(name)
211    }
212
213    /// Lookups a name in the scope.
214    ///
215    /// Returns `None` if the name is not available in the scope.
216    pub fn lookup(&self, name: &str) -> Option<&Name> {
217        let mut current = Some(self.index);
218
219        while let Some(index) = current {
220            if let Some(name) = self.scopes[index.0].names.get(name) {
221                return Some(name);
222            }
223
224            current = self.scopes[index.0].parent;
225        }
226
227        None
228    }
229}
230
231/// Represents a mutable reference to a scope.
232#[derive(Debug)]
233struct ScopeRefMut<'a> {
234    /// The reference to all scopes.
235    scopes: &'a mut [Scope],
236    /// The index to the scope.
237    index: ScopeIndex,
238}
239
240impl<'a> ScopeRefMut<'a> {
241    /// Creates a new mutable scope reference given the scope index.
242    fn new(scopes: &'a mut [Scope], index: ScopeIndex) -> Self {
243        Self { scopes, index }
244    }
245
246    /// Lookups a name in the scope.
247    ///
248    /// Returns `None` if the name is not available in the scope.
249    pub fn lookup(&self, name: &str) -> Option<&Name> {
250        let mut current = Some(self.index);
251
252        while let Some(index) = current {
253            if let Some(name) = self.scopes[index.0].names.get(name) {
254                return Some(name);
255            }
256
257            current = self.scopes[index.0].parent;
258        }
259
260        None
261    }
262
263    /// Inserts a name into the scope.
264    pub fn insert(&mut self, name: impl Into<String>, span: Span, ty: Type) {
265        self.scopes[self.index.0]
266            .names
267            .insert(name.into(), Name { span, ty });
268    }
269
270    /// Converts the mutable scope reference to an immutable scope reference.
271    pub fn as_scope_ref(&'a self) -> ScopeRef<'a> {
272        ScopeRef {
273            scopes: self.scopes,
274            index: self.index,
275        }
276    }
277}
278
279/// Represents a task or workflow input.
280#[derive(Debug, Clone, PartialEq, Eq)]
281pub struct Input {
282    /// The type of the input.
283    ty: Type,
284    /// Whether or not the input is required.
285    required: bool,
286}
287
288impl Input {
289    /// Gets the type of the input.
290    pub fn ty(&self) -> &Type {
291        &self.ty
292    }
293
294    /// Whether or not the input is required.
295    pub fn required(&self) -> bool {
296        self.required
297    }
298}
299
300/// Represents a task or workflow output.
301#[derive(Debug, Clone, PartialEq, Eq)]
302pub struct Output {
303    /// The type of the output.
304    ty: Type,
305}
306
307impl Output {
308    /// Creates a new output with the given type.
309    pub(crate) fn new(ty: Type) -> Self {
310        Self { ty }
311    }
312
313    /// Gets the type of the output.
314    pub fn ty(&self) -> &Type {
315        &self.ty
316    }
317}
318
319/// Represents a task in a document.
320#[derive(Debug)]
321pub struct Task {
322    /// The span of the task name.
323    name_span: Span,
324    /// The name of the task.
325    name: String,
326    /// The scopes contained in the task.
327    ///
328    /// The first scope will always be the task's scope.
329    ///
330    /// The scopes will be in sorted order by span start.
331    scopes: Vec<Scope>,
332    /// The inputs of the task.
333    inputs: Arc<IndexMap<String, Input>>,
334    /// The outputs of the task.
335    outputs: Arc<IndexMap<String, Output>>,
336}
337
338impl Task {
339    /// Gets the name of the task.
340    pub fn name(&self) -> &str {
341        &self.name
342    }
343
344    /// Gets the span of the name.
345    pub fn name_span(&self) -> Span {
346        self.name_span
347    }
348
349    /// Gets the scope of the task.
350    pub fn scope(&self) -> ScopeRef<'_> {
351        ScopeRef::new(&self.scopes, ScopeIndex(0))
352    }
353
354    /// Gets the inputs of the task.
355    pub fn inputs(&self) -> &IndexMap<String, Input> {
356        &self.inputs
357    }
358
359    /// Gets the outputs of the task.
360    pub fn outputs(&self) -> &IndexMap<String, Output> {
361        &self.outputs
362    }
363}
364
365/// Represents a workflow in a document.
366#[derive(Debug)]
367pub struct Workflow {
368    /// The span of the workflow name.
369    name_span: Span,
370    /// The name of the workflow.
371    name: String,
372    /// The scopes contained in the workflow.
373    ///
374    /// The first scope will always be the workflow's scope.
375    ///
376    /// The scopes will be in sorted order by span start.
377    scopes: Vec<Scope>,
378    /// The inputs of the workflow.
379    inputs: Arc<IndexMap<String, Input>>,
380    /// The outputs of the workflow.
381    outputs: Arc<IndexMap<String, Output>>,
382    /// The calls made by the workflow.
383    calls: HashMap<String, CallType>,
384    /// Whether or not nested inputs are allowed for the workflow.
385    allows_nested_inputs: bool,
386}
387
388impl Workflow {
389    /// Gets the name of the workflow.
390    pub fn name(&self) -> &str {
391        &self.name
392    }
393
394    /// Gets the span of the name.
395    pub fn name_span(&self) -> Span {
396        self.name_span
397    }
398
399    /// Gets the scope of the workflow.
400    pub fn scope(&self) -> ScopeRef<'_> {
401        ScopeRef::new(&self.scopes, ScopeIndex(0))
402    }
403
404    /// Gets the inputs of the workflow.
405    pub fn inputs(&self) -> &IndexMap<String, Input> {
406        &self.inputs
407    }
408
409    /// Gets the outputs of the workflow.
410    pub fn outputs(&self) -> &IndexMap<String, Output> {
411        &self.outputs
412    }
413
414    /// Gets the calls made by the workflow.
415    pub fn calls(&self) -> &HashMap<String, CallType> {
416        &self.calls
417    }
418
419    /// Determines if the workflow allows nested inputs.
420    pub fn allows_nested_inputs(&self) -> bool {
421        self.allows_nested_inputs
422    }
423}
424
425/// Represents analysis data about a WDL document.
426#[derive(Debug)]
427struct DocumentData {
428    /// The root CST node of the document.
429    ///
430    /// This is `None` when the document could not be parsed.
431    root: Option<GreenNode>,
432    /// The document identifier.
433    ///
434    /// The identifier changes every time the document is analyzed.
435    id: Arc<String>,
436    /// The URI of the analyzed document.
437    uri: Arc<Url>,
438    /// The version of the document.
439    version: Option<SupportedVersion>,
440    /// The namespaces in the document.
441    namespaces: IndexMap<String, Namespace>,
442    /// The tasks in the document.
443    tasks: IndexMap<String, Task>,
444    /// The singular workflow in the document.
445    workflow: Option<Workflow>,
446    /// The structs in the document.
447    structs: IndexMap<String, Struct>,
448    /// The diagnostics for the document.
449    diagnostics: Vec<Diagnostic>,
450}
451
452impl DocumentData {
453    /// Constructs a new analysis document data.
454    fn new(
455        uri: Arc<Url>,
456        root: Option<GreenNode>,
457        version: Option<SupportedVersion>,
458        diagnostics: Vec<Diagnostic>,
459    ) -> Self {
460        Self {
461            root,
462            id: Uuid::new_v4().to_string().into(),
463            uri,
464            version,
465            namespaces: Default::default(),
466            tasks: Default::default(),
467            workflow: Default::default(),
468            structs: Default::default(),
469            diagnostics,
470        }
471    }
472}
473
474/// Represents an analyzed WDL document.
475///
476/// This type is cheaply cloned.
477#[derive(Debug, Clone)]
478pub struct Document {
479    /// The document data for the document.
480    data: Arc<DocumentData>,
481}
482
483impl Document {
484    /// Creates a new default document from a URI.
485    pub(crate) fn default_from_uri(uri: Arc<Url>) -> Self {
486        Self {
487            data: Arc::new(DocumentData::new(uri, None, None, Default::default())),
488        }
489    }
490
491    /// Creates a new analyzed document from a document graph node.
492    pub(crate) fn from_graph_node(
493        config: DiagnosticsConfig,
494        graph: &DocumentGraph,
495        index: NodeIndex,
496    ) -> Self {
497        let node = graph.get(index);
498
499        let diagnostics = match node.parse_state() {
500            ParseState::NotParsed => panic!("node should have been parsed"),
501            ParseState::Error(_) => return Self::default_from_uri(node.uri().clone()),
502            ParseState::Parsed { diagnostics, .. } => diagnostics,
503        };
504
505        let root = node.root().expect("node should have been parsed");
506        let (version, config) = match root.version_statement() {
507            Some(stmt) => (stmt.version(), config.excepted_for_node(stmt.inner())),
508            None => {
509                // Don't process a document with a missing version
510                return Self {
511                    data: Arc::new(DocumentData::new(
512                        node.uri().clone(),
513                        Some(root.inner().green().into()),
514                        None,
515                        diagnostics.to_vec(),
516                    )),
517                };
518            }
519        };
520
521        let mut data = DocumentData::new(
522            node.uri().clone(),
523            Some(root.inner().green().into()),
524            SupportedVersion::from_str(version.text()).ok(),
525            diagnostics.to_vec(),
526        );
527        match root.ast() {
528            Ast::Unsupported => {}
529            Ast::V1(ast) => v1::populate_document(&mut data, config, graph, index, &ast, &version),
530        }
531
532        // Check for unused imports
533        if let Some(severity) = config.unused_import {
534            let DocumentData {
535                namespaces,
536                diagnostics,
537                ..
538            } = &mut data;
539
540            diagnostics.extend(
541                namespaces
542                    .iter()
543                    .filter(|(_, ns)| !ns.used && !ns.excepted)
544                    .map(|(name, ns)| unused_import(name, ns.span()).with_severity(severity)),
545            );
546        }
547
548        Self {
549            data: Arc::new(data),
550        }
551    }
552
553    /// Gets the root AST document node.
554    ///
555    /// # Panics
556    ///
557    /// Panics if the document was not parsed.
558    pub fn root(&self) -> wdl_ast::Document {
559        wdl_ast::Document::cast(SyntaxNode::new_root(
560            self.data.root.clone().expect("should have a root"),
561        ))
562        .expect("should cast")
563    }
564
565    /// Gets the identifier of the document.
566    ///
567    /// This value changes when a document is reanalyzed.
568    pub fn id(&self) -> &Arc<String> {
569        &self.data.id
570    }
571
572    /// Gets the URI of the document.
573    pub fn uri(&self) -> &Arc<Url> {
574        &self.data.uri
575    }
576
577    /// Gets the path to the document.
578    ///
579    /// If the scheme of the document's URI is not `file`, this will return the
580    /// URI as a string. Otherwise, this will attempt to return the path
581    /// relative to the current working directory, or the absolute path
582    /// failing that.
583    pub fn path(&self) -> Cow<'_, str> {
584        if let Ok(path) = self.data.uri.to_file_path() {
585            if let Some(path) = std::env::current_dir()
586                .ok()
587                .and_then(|cwd| path.strip_prefix(cwd).ok().and_then(Path::to_str))
588            {
589                return path.to_string().into();
590            }
591
592            if let Ok(path) = path.into_os_string().into_string() {
593                return path.into();
594            }
595        }
596
597        self.data.uri.as_str().into()
598    }
599
600    /// Gets the supported version of the document.
601    ///
602    /// Returns `None` if the document could not be parsed or contains an
603    /// unsupported version.
604    pub fn version(&self) -> Option<SupportedVersion> {
605        self.data.version
606    }
607
608    /// Gets the namespaces in the document.
609    pub fn namespaces(&self) -> impl Iterator<Item = (&str, &Namespace)> {
610        self.data.namespaces.iter().map(|(n, ns)| (n.as_str(), ns))
611    }
612
613    /// Gets a namespace in the document by name.
614    pub fn namespace(&self, name: &str) -> Option<&Namespace> {
615        self.data.namespaces.get(name)
616    }
617
618    /// Gets the tasks in the document.
619    pub fn tasks(&self) -> impl Iterator<Item = &Task> {
620        self.data.tasks.iter().map(|(_, t)| t)
621    }
622
623    /// Gets a task in the document by name.
624    pub fn task_by_name(&self, name: &str) -> Option<&Task> {
625        self.data.tasks.get(name)
626    }
627
628    /// Gets a workflow in the document.
629    ///
630    /// Returns `None` if the document did not contain a workflow.
631    pub fn workflow(&self) -> Option<&Workflow> {
632        self.data.workflow.as_ref()
633    }
634
635    /// Gets the structs in the document.
636    pub fn structs(&self) -> impl Iterator<Item = (&str, &Struct)> {
637        self.data.structs.iter().map(|(n, s)| (n.as_str(), s))
638    }
639
640    /// Gets a struct in the document by name.
641    pub fn struct_by_name(&self, name: &str) -> Option<&Struct> {
642        self.data.structs.get(name)
643    }
644
645    /// Gets the analysis diagnostics for the document.
646    pub fn diagnostics(&self) -> &[Diagnostic] {
647        &self.data.diagnostics
648    }
649
650    /// Sorts the diagnostics for the document.
651    ///
652    /// # Panics
653    ///
654    /// Panics if there is more than one reference to the document.
655    pub fn sort_diagnostics(&mut self) -> Self {
656        let data = &mut self.data;
657        let inner = Arc::get_mut(data).expect("should only have one reference");
658        inner.diagnostics.sort();
659        Self { data: data.clone() }
660    }
661
662    /// Extends the diagnostics for the document.
663    ///
664    /// # Panics
665    ///
666    /// Panics if there is more than one reference to the document.
667    pub fn extend_diagnostics(&mut self, diagnostics: Vec<Diagnostic>) -> Self {
668        let data = &mut self.data;
669        let inner = Arc::get_mut(data).expect("should only have one reference");
670        inner.diagnostics.extend(diagnostics);
671        Self { data: data.clone() }
672    }
673
674    /// Finds a scope based on a position within the document.
675    pub fn find_scope_by_position(&self, position: usize) -> Option<ScopeRef<'_>> {
676        /// Finds a scope within a collection of sorted scopes by position.
677        fn find_scope(scopes: &[Scope], position: usize) -> Option<ScopeRef<'_>> {
678            let mut index = match scopes.binary_search_by_key(&position, |s| s.span.start()) {
679                Ok(index) => index,
680                Err(index) => {
681                    // This indicates that we couldn't find a match and the match would go _before_
682                    // the first scope, so there is no containing scope.
683                    if index == 0 {
684                        return None;
685                    }
686
687                    index - 1
688                }
689            };
690
691            // We now have the index to start looking up the list of scopes
692            // We walk up the list to try to find a span that contains the position
693            loop {
694                let scope = &scopes[index];
695                if scope.span.contains(position) {
696                    return Some(ScopeRef::new(scopes, ScopeIndex(index)));
697                }
698
699                if index == 0 {
700                    return None;
701                }
702
703                index -= 1;
704            }
705        }
706
707        // Check to see if the position is contained in the workflow
708        if let Some(workflow) = &self.data.workflow {
709            if workflow.scope().span().contains(position) {
710                return find_scope(&workflow.scopes, position);
711            }
712        }
713
714        // Search for a task that might contain the position
715        let task = match self
716            .data
717            .tasks
718            .binary_search_by_key(&position, |_, t| t.scope().span().start())
719        {
720            Ok(index) => &self.data.tasks[index],
721            Err(index) => {
722                // This indicates that we couldn't find a match and the match would go _before_
723                // the first task, so there is no containing task.
724                if index == 0 {
725                    return None;
726                }
727
728                &self.data.tasks[index - 1]
729            }
730        };
731
732        if task.scope().span().contains(position) {
733            return find_scope(&task.scopes, position);
734        }
735
736        None
737    }
738
739    /// Determines if the document, or any documents transitively imported by
740    /// this document, has errors.
741    ///
742    /// Returns `true` if the document, or one of its transitive imports, has at
743    /// least one error diagnostic.
744    ///
745    /// Returns `false` if the document, and all of its transitive imports, have
746    /// no error diagnostics.
747    pub fn has_errors(&self) -> bool {
748        // Check this document for errors
749        if self
750            .diagnostics()
751            .iter()
752            .any(|d| d.severity() == Severity::Error)
753        {
754            return true;
755        }
756
757        // Check every imported document for errors
758        for (_, ns) in self.namespaces() {
759            if ns.document.has_errors() {
760                return true;
761            }
762        }
763
764        false
765    }
766
767    /// Visits the document with a pre-order traversal using the provided
768    /// visitor to visit each element in the document.
769    pub fn visit<V: crate::Visitor>(&self, diagnostics: &mut crate::Diagnostics, visitor: &mut V) {
770        crate::visit(self, diagnostics, visitor)
771    }
772}