wdl_analysis/
document.rs

1//! Representation of analyzed WDL documents.
2
3use std::borrow::Cow;
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::path::Path;
7use std::str::FromStr;
8use std::sync::Arc;
9
10use indexmap::IndexMap;
11use petgraph::graph::NodeIndex;
12use rowan::GreenNode;
13use url::Url;
14use uuid::Uuid;
15use wdl_ast::Ast;
16use wdl_ast::AstNode;
17use wdl_ast::AstToken;
18use wdl_ast::Diagnostic;
19use wdl_ast::Span;
20use wdl_ast::SupportedVersion;
21use wdl_ast::SyntaxNode;
22
23use crate::DiagnosticsConfig;
24use crate::diagnostics::unused_import;
25use crate::graph::DocumentGraph;
26use crate::graph::ParseState;
27use crate::types::CallType;
28use crate::types::Type;
29
30mod v1;
31
32/// The `task` variable name available in task command sections and outputs in
33/// WDL 1.2.
34pub const TASK_VAR_NAME: &str = "task";
35
36/// Represents a namespace introduced by an import.
37#[derive(Debug)]
38pub struct Namespace {
39    /// The span of the import that introduced the namespace.
40    span: Span,
41    /// The URI of the imported document that introduced the namespace.
42    source: Arc<Url>,
43    /// The namespace's document.
44    document: Document,
45    /// Whether or not the namespace is used (i.e. referenced) in the document.
46    used: bool,
47    /// Whether or not the namespace is excepted from the "unused import"
48    /// diagnostic.
49    excepted: bool,
50}
51
52impl Namespace {
53    /// Gets the span of the import that introduced the namespace.
54    pub fn span(&self) -> Span {
55        self.span
56    }
57
58    /// Gets the URI of the imported document that introduced the namespace.
59    pub fn source(&self) -> &Arc<Url> {
60        &self.source
61    }
62
63    /// Gets the imported document.
64    pub fn document(&self) -> &Document {
65        &self.document
66    }
67}
68
69/// Represents a struct in a document.
70#[derive(Debug, Clone)]
71pub struct Struct {
72    /// The span that introduced the struct.
73    ///
74    /// This is either the name of a struct definition (local) or an import's
75    /// URI or alias (imported).
76    span: Span,
77    /// The offset of the CST node from the start of the document.
78    ///
79    /// This is used to adjust diagnostics resulting from traversing the struct
80    /// node as if it were the root of the CST.
81    offset: usize,
82    /// Stores the CST node of the struct.
83    ///
84    /// This is used to calculate type equivalence for imports.
85    node: rowan::GreenNode,
86    /// The namespace that defines the struct.
87    ///
88    /// This is `Some` only for imported structs.
89    namespace: Option<String>,
90    /// The type of the struct.
91    ///
92    /// Initially this is `None` until a type check occurs.
93    ty: Option<Type>,
94}
95
96impl Struct {
97    /// Gets the namespace that defines this struct.
98    ///
99    /// Returns `None` for structs defined in the containing document or `Some`
100    /// for a struct introduced by an import.
101    pub fn namespace(&self) -> Option<&str> {
102        self.namespace.as_deref()
103    }
104
105    /// Gets the type of the struct.
106    ///
107    /// A value of `None` indicates that the type could not be determined for
108    /// the struct; this may happen if the struct definition is recursive.
109    pub fn ty(&self) -> Option<&Type> {
110        self.ty.as_ref()
111    }
112}
113
114/// Represents information about a name in a scope.
115#[derive(Debug, Clone)]
116pub struct Name {
117    /// The span of the name.
118    span: Span,
119    /// The type of the name.
120    ty: Type,
121}
122
123impl Name {
124    /// Gets the span of the name.
125    pub fn span(&self) -> Span {
126        self.span
127    }
128
129    /// Gets the type of the name.
130    pub fn ty(&self) -> &Type {
131        &self.ty
132    }
133}
134
135/// Represents an index of a scope in a collection of scopes.
136#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
137struct ScopeIndex(usize);
138
139/// Represents a scope in a WDL document.
140#[derive(Debug)]
141struct Scope {
142    /// The index of the parent scope.
143    ///
144    /// This is `None` for task and workflow scopes.
145    parent: Option<ScopeIndex>,
146    /// The span in the document where the names of the scope are visible.
147    span: Span,
148    /// The map of names in scope to their span and types.
149    names: IndexMap<String, Name>,
150}
151
152impl Scope {
153    /// Creates a new scope given the parent scope and span.
154    fn new(parent: Option<ScopeIndex>, span: Span) -> Self {
155        Self {
156            parent,
157            span,
158            names: Default::default(),
159        }
160    }
161
162    /// Inserts a name into the scope.
163    pub fn insert(&mut self, name: impl Into<String>, span: Span, ty: Type) {
164        self.names.insert(name.into(), Name { span, ty });
165    }
166}
167
168/// Represents a reference to a scope.
169#[derive(Debug, Clone, Copy)]
170pub struct ScopeRef<'a> {
171    /// The reference to the scopes collection.
172    scopes: &'a [Scope],
173    /// The index of the scope in the collection.
174    index: ScopeIndex,
175}
176
177impl<'a> ScopeRef<'a> {
178    /// Creates a new scope reference given the scope index.
179    fn new(scopes: &'a [Scope], index: ScopeIndex) -> Self {
180        Self { scopes, index }
181    }
182
183    /// Gets the span of the scope.
184    pub fn span(&self) -> Span {
185        self.scopes[self.index.0].span
186    }
187
188    /// Gets the parent scope.
189    ///
190    /// Returns `None` if there is no parent scope.
191    pub fn parent(&self) -> Option<Self> {
192        self.scopes[self.index.0].parent.map(|p| Self {
193            scopes: self.scopes,
194            index: p,
195        })
196    }
197
198    /// Gets all of the names available at this scope.
199    pub fn names(&self) -> impl Iterator<Item = (&str, &Name)> + use<'_> {
200        self.scopes[self.index.0]
201            .names
202            .iter()
203            .map(|(name, n)| (name.as_str(), n))
204    }
205
206    /// Gets a name local to this scope.
207    ///
208    /// Returns `None` if a name local to this scope was not found.
209    pub fn local(&self, name: &str) -> Option<&Name> {
210        self.scopes[self.index.0].names.get(name)
211    }
212
213    /// Lookups a name in the scope.
214    ///
215    /// Returns `None` if the name is not available in the scope.
216    pub fn lookup(&self, name: &str) -> Option<&Name> {
217        let mut current = Some(self.index);
218
219        while let Some(index) = current {
220            if let Some(name) = self.scopes[index.0].names.get(name) {
221                return Some(name);
222            }
223
224            current = self.scopes[index.0].parent;
225        }
226
227        None
228    }
229}
230
231/// Represents a mutable reference to a scope.
232#[derive(Debug)]
233struct ScopeRefMut<'a> {
234    /// The reference to all scopes.
235    scopes: &'a mut [Scope],
236    /// The index to the scope.
237    index: ScopeIndex,
238}
239
240impl<'a> ScopeRefMut<'a> {
241    /// Creates a new mutable scope reference given the scope index.
242    fn new(scopes: &'a mut [Scope], index: ScopeIndex) -> Self {
243        Self { scopes, index }
244    }
245
246    /// Lookups a name in the scope.
247    ///
248    /// Returns `None` if the name is not available in the scope.
249    pub fn lookup(&self, name: &str) -> Option<&Name> {
250        let mut current = Some(self.index);
251
252        while let Some(index) = current {
253            if let Some(name) = self.scopes[index.0].names.get(name) {
254                return Some(name);
255            }
256
257            current = self.scopes[index.0].parent;
258        }
259
260        None
261    }
262
263    /// Inserts a name into the scope.
264    pub fn insert(&mut self, name: impl Into<String>, span: Span, ty: Type) {
265        self.scopes[self.index.0]
266            .names
267            .insert(name.into(), Name { span, ty });
268    }
269
270    /// Converts the mutable scope reference to an immutable scope reference.
271    pub fn as_scope_ref(&'a self) -> ScopeRef<'a> {
272        ScopeRef {
273            scopes: self.scopes,
274            index: self.index,
275        }
276    }
277}
278
279/// Represents a task or workflow input.
280#[derive(Debug, Clone, PartialEq, Eq)]
281pub struct Input {
282    /// The type of the input.
283    ty: Type,
284    /// Whether or not the input is required.
285    required: bool,
286}
287
288impl Input {
289    /// Gets the type of the input.
290    pub fn ty(&self) -> &Type {
291        &self.ty
292    }
293
294    /// Whether or not the input is required.
295    pub fn required(&self) -> bool {
296        self.required
297    }
298}
299
300/// Represents a task or workflow output.
301#[derive(Debug, Clone, PartialEq, Eq)]
302pub struct Output {
303    /// The type of the output.
304    ty: Type,
305}
306
307impl Output {
308    /// Creates a new output with the given type.
309    pub(crate) fn new(ty: Type) -> Self {
310        Self { ty }
311    }
312
313    /// Gets the type of the output.
314    pub fn ty(&self) -> &Type {
315        &self.ty
316    }
317}
318
319/// Represents a task in a document.
320#[derive(Debug)]
321pub struct Task {
322    /// The span of the task name.
323    name_span: Span,
324    /// The name of the task.
325    name: String,
326    /// The scopes contained in the task.
327    ///
328    /// The first scope will always be the task's scope.
329    ///
330    /// The scopes will be in sorted order by span start.
331    scopes: Vec<Scope>,
332    /// The inputs of the task.
333    inputs: Arc<IndexMap<String, Input>>,
334    /// The outputs of the task.
335    outputs: Arc<IndexMap<String, Output>>,
336}
337
338impl Task {
339    /// Gets the name of the task.
340    pub fn name(&self) -> &str {
341        &self.name
342    }
343
344    /// Gets the scope of the task.
345    pub fn scope(&self) -> ScopeRef<'_> {
346        ScopeRef::new(&self.scopes, ScopeIndex(0))
347    }
348
349    /// Gets the inputs of the task.
350    pub fn inputs(&self) -> &IndexMap<String, Input> {
351        &self.inputs
352    }
353
354    /// Gets the outputs of the task.
355    pub fn outputs(&self) -> &IndexMap<String, Output> {
356        &self.outputs
357    }
358}
359
360/// Represents a workflow in a document.
361#[derive(Debug)]
362pub struct Workflow {
363    /// The span of the workflow name.
364    name_span: Span,
365    /// The name of the workflow.
366    name: String,
367    /// The scopes contained in the workflow.
368    ///
369    /// The first scope will always be the workflow's scope.
370    ///
371    /// The scopes will be in sorted order by span start.
372    scopes: Vec<Scope>,
373    /// The inputs of the workflow.
374    inputs: Arc<IndexMap<String, Input>>,
375    /// The outputs of the workflow.
376    outputs: Arc<IndexMap<String, Output>>,
377    /// The calls made by the workflow.
378    calls: HashMap<String, CallType>,
379    /// Whether or not nested inputs are allowed for the workflow.
380    allows_nested_inputs: bool,
381}
382
383impl Workflow {
384    /// Gets the name of the workflow.
385    pub fn name(&self) -> &str {
386        &self.name
387    }
388
389    /// Gets the scope of the workflow.
390    pub fn scope(&self) -> ScopeRef<'_> {
391        ScopeRef::new(&self.scopes, ScopeIndex(0))
392    }
393
394    /// Gets the inputs of the workflow.
395    pub fn inputs(&self) -> &IndexMap<String, Input> {
396        &self.inputs
397    }
398
399    /// Gets the outputs of the workflow.
400    pub fn outputs(&self) -> &IndexMap<String, Output> {
401        &self.outputs
402    }
403
404    /// Gets the calls made by the workflow.
405    pub fn calls(&self) -> &HashMap<String, CallType> {
406        &self.calls
407    }
408
409    /// Determines if the workflow allows nested inputs.
410    pub fn allows_nested_inputs(&self) -> bool {
411        self.allows_nested_inputs
412    }
413}
414
415/// Represents analysis data about a WDL document.
416#[derive(Debug)]
417struct DocumentData {
418    /// The root CST node of the document.
419    ///
420    /// This is `None` when the document could not be parsed.
421    root: Option<GreenNode>,
422    /// The document identifier.
423    ///
424    /// The identifier changes every time the document is analyzed.
425    id: Arc<String>,
426    /// The URI of the analyzed document.
427    uri: Arc<Url>,
428    /// The version of the document.
429    version: Option<SupportedVersion>,
430    /// The namespaces in the document.
431    namespaces: IndexMap<String, Namespace>,
432    /// The tasks in the document.
433    tasks: IndexMap<String, Task>,
434    /// The singular workflow in the document.
435    workflow: Option<Workflow>,
436    /// The structs in the document.
437    structs: IndexMap<String, Struct>,
438    /// The diagnostics for the document.
439    diagnostics: Vec<Diagnostic>,
440}
441
442impl DocumentData {
443    /// Constructs a new analysis document data.
444    fn new(
445        uri: Arc<Url>,
446        root: Option<GreenNode>,
447        version: Option<SupportedVersion>,
448        diagnostics: Vec<Diagnostic>,
449    ) -> Self {
450        Self {
451            root,
452            id: Uuid::new_v4().to_string().into(),
453            uri,
454            version,
455            namespaces: Default::default(),
456            tasks: Default::default(),
457            workflow: Default::default(),
458            structs: Default::default(),
459            diagnostics,
460        }
461    }
462}
463
464/// Represents an analyzed WDL document.
465///
466/// This type is cheaply cloned.
467#[derive(Debug, Clone)]
468pub struct Document {
469    /// The document data for the document.
470    data: Arc<DocumentData>,
471}
472
473impl Document {
474    /// Creates a new analyzed document from a document graph node.
475    pub(crate) fn from_graph_node(
476        config: DiagnosticsConfig,
477        graph: &DocumentGraph,
478        index: NodeIndex,
479    ) -> Self {
480        let node = graph.get(index);
481
482        let diagnostics = match node.parse_state() {
483            ParseState::NotParsed => panic!("node should have been parsed"),
484            ParseState::Error(_) => {
485                return Self {
486                    data: Arc::new(DocumentData::new(
487                        node.uri().clone(),
488                        None,
489                        None,
490                        Default::default(),
491                    )),
492                };
493            }
494            ParseState::Parsed { diagnostics, .. } => {
495                Vec::from_iter(diagnostics.as_ref().iter().cloned())
496            }
497        };
498
499        let root = node.root().expect("node should have been parsed");
500        let (version, config) = match root.version_statement() {
501            Some(stmt) => (stmt.version(), config.excepted_for_node(stmt.inner())),
502            None => {
503                // Don't process a document with a missing version
504                return Self {
505                    data: Arc::new(DocumentData::new(
506                        node.uri().clone(),
507                        Some(root.inner().green().into()),
508                        None,
509                        diagnostics,
510                    )),
511                };
512            }
513        };
514
515        let mut data = DocumentData::new(
516            node.uri().clone(),
517            Some(root.inner().green().into()),
518            SupportedVersion::from_str(version.text()).ok(),
519            diagnostics,
520        );
521        match root.ast() {
522            Ast::Unsupported => {}
523            Ast::V1(ast) => v1::populate_document(&mut data, config, graph, index, &ast, &version),
524        }
525
526        // Check for unused imports
527        if let Some(severity) = config.unused_import {
528            let DocumentData {
529                namespaces,
530                diagnostics,
531                ..
532            } = &mut data;
533
534            diagnostics.extend(
535                namespaces
536                    .iter()
537                    .filter(|(_, ns)| !ns.used && !ns.excepted)
538                    .map(|(name, ns)| unused_import(name, ns.span()).with_severity(severity)),
539            );
540        }
541
542        // Sort the diagnostics by start
543        data.diagnostics
544            .sort_by(|a, b| match (a.labels().next(), b.labels().next()) {
545                (None, None) => Ordering::Equal,
546                (None, Some(_)) => Ordering::Less,
547                (Some(_), None) => Ordering::Greater,
548                (Some(a), Some(b)) => a.span().start().cmp(&b.span().start()),
549            });
550
551        Self {
552            data: Arc::new(data),
553        }
554    }
555
556    /// Gets the root AST document node.
557    ///
558    /// # Panics
559    ///
560    /// Panics if the document was not parsed.
561    pub fn root(&self) -> wdl_ast::Document {
562        wdl_ast::Document::cast(SyntaxNode::new_root(
563            self.data.root.clone().expect("should have a root"),
564        ))
565        .expect("should cast")
566    }
567
568    /// Gets the identifier of the document.
569    ///
570    /// This value changes when a document is reanalyzed.
571    pub fn id(&self) -> &Arc<String> {
572        &self.data.id
573    }
574
575    /// Gets the URI of the document.
576    pub fn uri(&self) -> &Arc<Url> {
577        &self.data.uri
578    }
579
580    /// Gets the path to the document.
581    ///
582    /// If the scheme of the document's URI is not `file`, this will return the
583    /// URI as a string. Otherwise, this will attempt to return the path
584    /// relative to the current working directory, or the absolute path
585    /// failing that.
586    pub fn path(&self) -> Cow<'_, str> {
587        if let Ok(path) = self.data.uri.to_file_path() {
588            if let Some(path) = std::env::current_dir()
589                .ok()
590                .and_then(|cwd| path.strip_prefix(cwd).ok().and_then(Path::to_str))
591            {
592                return path.to_string().into();
593            }
594
595            if let Ok(path) = path.into_os_string().into_string() {
596                return path.into();
597            }
598        }
599
600        self.data.uri.as_str().into()
601    }
602
603    /// Gets the supported version of the document.
604    ///
605    /// Returns `None` if the document could not be parsed or contains an
606    /// unsupported version.
607    pub fn version(&self) -> Option<SupportedVersion> {
608        self.data.version
609    }
610
611    /// Gets the namespaces in the document.
612    pub fn namespaces(&self) -> impl Iterator<Item = (&str, &Namespace)> {
613        self.data.namespaces.iter().map(|(n, ns)| (n.as_str(), ns))
614    }
615
616    /// Gets a namespace in the document by name.
617    pub fn namespace(&self, name: &str) -> Option<&Namespace> {
618        self.data.namespaces.get(name)
619    }
620
621    /// Gets the tasks in the document.
622    pub fn tasks(&self) -> impl Iterator<Item = &Task> {
623        self.data.tasks.iter().map(|(_, t)| t)
624    }
625
626    /// Gets a task in the document by name.
627    pub fn task_by_name(&self, name: &str) -> Option<&Task> {
628        self.data.tasks.get(name)
629    }
630
631    /// Gets a workflow in the document.
632    ///
633    /// Returns `None` if the document did not contain a workflow.
634    pub fn workflow(&self) -> Option<&Workflow> {
635        self.data.workflow.as_ref()
636    }
637
638    /// Gets the structs in the document.
639    pub fn structs(&self) -> impl Iterator<Item = (&str, &Struct)> {
640        self.data.structs.iter().map(|(n, s)| (n.as_str(), s))
641    }
642
643    /// Gets a struct in the document by name.
644    pub fn struct_by_name(&self, name: &str) -> Option<&Struct> {
645        self.data.structs.get(name)
646    }
647
648    /// Gets the analysis diagnostics for the document.
649    pub fn diagnostics(&self) -> &[Diagnostic] {
650        &self.data.diagnostics
651    }
652
653    /// Finds a scope based on a position within the document.
654    pub fn find_scope_by_position(&self, position: usize) -> Option<ScopeRef<'_>> {
655        /// Finds a scope within a collection of sorted scopes by position.
656        fn find_scope(scopes: &[Scope], position: usize) -> Option<ScopeRef<'_>> {
657            let mut index = match scopes.binary_search_by_key(&position, |s| s.span.start()) {
658                Ok(index) => index,
659                Err(index) => {
660                    // This indicates that we couldn't find a match and the match would go _before_
661                    // the first scope, so there is no containing scope.
662                    if index == 0 {
663                        return None;
664                    }
665
666                    index - 1
667                }
668            };
669
670            // We now have the index to start looking up the list of scopes
671            // We walk up the list to try to find a span that contains the position
672            loop {
673                let scope = &scopes[index];
674                if scope.span.contains(position) {
675                    return Some(ScopeRef::new(scopes, ScopeIndex(index)));
676                }
677
678                if index == 0 {
679                    return None;
680                }
681
682                index -= 1;
683            }
684        }
685
686        // Check to see if the position is contained in the workflow
687        if let Some(workflow) = &self.data.workflow {
688            if workflow.scope().span().contains(position) {
689                return find_scope(&workflow.scopes, position);
690            }
691        }
692
693        // Search for a task that might contain the position
694        let task = match self
695            .data
696            .tasks
697            .binary_search_by_key(&position, |_, t| t.scope().span().start())
698        {
699            Ok(index) => &self.data.tasks[index],
700            Err(index) => {
701                // This indicates that we couldn't find a match and the match would go _before_
702                // the first task, so there is no containing task.
703                if index == 0 {
704                    return None;
705                }
706
707                &self.data.tasks[index - 1]
708            }
709        };
710
711        if task.scope().span().contains(position) {
712            return find_scope(&task.scopes, position);
713        }
714
715        None
716    }
717}