Skip to main content

wdl_analysis/eval/
v1.rs

1//! Evaluation graphs for WDL 1.x.
2
3use std::collections::HashMap;
4use std::fmt;
5use std::fmt::Debug;
6
7use petgraph::algo::DfsSpace;
8use petgraph::algo::has_path_connecting;
9use petgraph::graph::DiGraph;
10use petgraph::graph::NodeIndex;
11use petgraph::visit::Visitable;
12use smallvec::SmallVec;
13use smallvec::smallvec;
14use wdl_ast::AstNode;
15use wdl_ast::AstToken;
16use wdl_ast::Diagnostic;
17use wdl_ast::Ident;
18use wdl_ast::SupportedVersion;
19use wdl_ast::SyntaxKind;
20use wdl_ast::SyntaxNode;
21use wdl_ast::TokenText;
22use wdl_ast::TreeNode;
23use wdl_ast::v1::CallStatement;
24use wdl_ast::v1::CommandPart;
25use wdl_ast::v1::CommandSection;
26use wdl_ast::v1::ConditionalStatement;
27use wdl_ast::v1::ConditionalStatementClause;
28use wdl_ast::v1::Decl;
29use wdl_ast::v1::Expr;
30use wdl_ast::v1::NameRefExpr;
31use wdl_ast::v1::RequirementsSection;
32use wdl_ast::v1::RuntimeSection;
33use wdl_ast::v1::ScatterStatement;
34use wdl_ast::v1::TaskDefinition;
35use wdl_ast::v1::TaskHintsSection;
36use wdl_ast::v1::TaskItem;
37use wdl_ast::v1::WorkflowDefinition;
38use wdl_ast::v1::WorkflowItem;
39use wdl_ast::v1::WorkflowStatement;
40use wdl_ast::version::V1;
41
42use crate::diagnostics::NameContext;
43use crate::diagnostics::call_conflict;
44use crate::diagnostics::name_conflict;
45use crate::diagnostics::self_referential;
46use crate::diagnostics::task_reference_cycle;
47use crate::diagnostics::unknown_name;
48use crate::diagnostics::workflow_reference_cycle;
49use crate::document::TASK_VAR_NAME;
50
51/// Represents a node in an task evaluation graph.
52#[derive(Debug, Clone)]
53pub enum TaskGraphNode<N: TreeNode = SyntaxNode> {
54    /// The node is an input.
55    Input(Decl<N>),
56    /// The node is a private decl.
57    Decl(Decl<N>),
58    /// The node is an output decl.
59    Output(Decl<N>),
60    /// The node is a command section.
61    Command(CommandSection<N>),
62    /// The node is a `runtime` section.
63    Runtime(RuntimeSection<N>),
64    /// The node is a `requirements` section.
65    Requirements(RequirementsSection<N>),
66    /// The node is a `hints` section.
67    Hints(TaskHintsSection<N>),
68}
69
70impl<N: TreeNode> TaskGraphNode<N> {
71    /// Gets the context of the name introduced by the node.
72    ///
73    /// Returns `None` if the node did not introduce a name.
74    fn context(&self) -> Option<NameContext> {
75        match self {
76            Self::Input(decl) => Some(NameContext::Input(decl.name().span())),
77            Self::Decl(decl) => Some(NameContext::Decl(decl.name().span())),
78            Self::Output(decl) => Some(NameContext::Output(decl.name().span())),
79            Self::Command(_) | Self::Runtime(_) | Self::Requirements(_) | Self::Hints(_) => None,
80        }
81    }
82
83    /// Gets the expression associated with the node.
84    ///
85    /// Returns `None` if the node has no expression.
86    fn expr(&self) -> Option<Expr<N>> {
87        match self {
88            Self::Input(decl) | Self::Decl(decl) | Self::Output(decl) => decl.expr(),
89            Self::Command(_) | Self::Runtime(_) | Self::Requirements(_) | Self::Hints(_) => None,
90        }
91    }
92}
93
94impl<N: TreeNode> fmt::Display for TaskGraphNode<N> {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        match self {
97            Self::Input(decl) | Self::Decl(decl) | Self::Output(decl) => {
98                write!(f, "`{name}`", name = decl.name().text())
99            }
100            Self::Command(_) => write!(f, "command section"),
101            Self::Runtime(_) => write!(f, "runtime section"),
102            Self::Requirements(_) => write!(f, "requirements section"),
103            Self::Hints(_) => write!(f, "hints section"),
104        }
105    }
106}
107
108/// A builder for task evaluation graphs.
109#[derive(Debug)]
110pub struct TaskGraphBuilder<N: TreeNode = SyntaxNode> {
111    /// The map of declaration names to node indexes in the graph.
112    names: HashMap<TokenText<N::Token>, NodeIndex>,
113    /// The command node index.
114    command: Option<NodeIndex>,
115    /// The runtime node index.
116    runtime: Option<NodeIndex>,
117    /// The requirements node index.
118    requirements: Option<NodeIndex>,
119    /// The hints node index.
120    hints: Option<NodeIndex>,
121    /// Space for DFS operations when building the graph.
122    space: DfsSpace<NodeIndex, <DiGraph<TaskGraphNode<N>, ()> as Visitable>::Map>,
123}
124
125impl<N: TreeNode> TaskGraphBuilder<N> {
126    /// Builds a new task evaluation graph.
127    ///
128    /// The nodes are [`TaskGraphNode`] and the edges represent a reverse
129    /// dependency relationship (A -> B => "node A is depended on by B").
130    ///
131    /// The edge data indicates whether or not the edge is an implicit edge
132    /// between the node and the command section.
133    ///
134    /// Commands implicitly depend on all inputs, environment variables, the
135    /// requirements section, the runtime section, and the hints section.
136    ///
137    /// Outputs implicitly depend on the command section.
138    pub fn build(
139        mut self,
140        version: SupportedVersion,
141        task: &TaskDefinition<N>,
142        diagnostics: &mut Vec<Diagnostic>,
143        custom_type_present: impl Fn(&str) -> bool,
144    ) -> DiGraph<TaskGraphNode<N>, bool> {
145        // Populate the declaration types and build a name reference graph
146        let mut graph = DiGraph::default();
147        let mut saw_inputs = false;
148        let mut outputs = None;
149        for item in task.items() {
150            match item {
151                TaskItem::Input(section) if !saw_inputs => {
152                    saw_inputs = true;
153                    for decl in section.declarations() {
154                        self.add_named_node(
155                            decl.name(),
156                            TaskGraphNode::Input(decl),
157                            &mut graph,
158                            diagnostics,
159                        );
160                    }
161                }
162                TaskItem::Output(section) if outputs.is_none() => {
163                    outputs = Some(section);
164                }
165                TaskItem::Declaration(decl) => {
166                    self.add_named_node(
167                        decl.name(),
168                        TaskGraphNode::Decl(Decl::Bound(decl)),
169                        &mut graph,
170                        diagnostics,
171                    );
172                }
173                TaskItem::Command(section) if self.command.is_none() => {
174                    self.command = Some(graph.add_node(TaskGraphNode::Command(section)));
175                }
176                TaskItem::Runtime(section) if self.runtime.is_none() => {
177                    self.runtime = Some(graph.add_node(TaskGraphNode::Runtime(section)));
178                }
179                TaskItem::Requirements(section)
180                    if version >= SupportedVersion::V1(V1::Two)
181                        && self.requirements.is_none()
182                        && self.runtime.is_none() =>
183                {
184                    self.requirements = Some(graph.add_node(TaskGraphNode::Requirements(section)));
185                }
186                TaskItem::Hints(section)
187                    if version >= SupportedVersion::V1(V1::Two)
188                        && self.hints.is_none()
189                        && self.runtime.is_none() =>
190                {
191                    self.hints = Some(graph.add_node(TaskGraphNode::Hints(section)));
192                }
193                _ => continue,
194            }
195        }
196
197        // Add name reference edges before adding the outputs
198        self.add_reference_edges(version, None, &mut graph, diagnostics, &custom_type_present);
199
200        // Add the outputs
201        let count = graph.node_count();
202        if let Some(section) = outputs {
203            for decl in section.declarations() {
204                self.add_named_node(
205                    decl.name(),
206                    TaskGraphNode::Output(Decl::Bound(decl)),
207                    &mut graph,
208                    diagnostics,
209                );
210            }
211        }
212
213        // Add reference edges again, but only for the output declaration nodes
214        self.add_reference_edges(
215            version,
216            Some(count),
217            &mut graph,
218            diagnostics,
219            &custom_type_present,
220        );
221
222        // Finally, add implicit edges to and from the command
223        if let Some(command) = self.command {
224            // The command section depends on the runtime section
225            if let Some(runtime) = self.runtime {
226                graph.update_edge(runtime, command, true);
227            }
228
229            // The command section depends on the requirements section
230            if let Some(requirements) = self.requirements {
231                graph.update_edge(requirements, command, true);
232            }
233
234            // The command section depends on the hints section
235            if let Some(hints) = self.hints {
236                graph.update_edge(hints, command, true);
237            }
238
239            // The command section depends on any input or environment variable declaration
240            // All outputs depend on the command
241            for index in self.names.values() {
242                match &graph[*index] {
243                    TaskGraphNode::Input(_) => {
244                        if !graph.contains_edge(*index, command) {
245                            graph.update_edge(*index, command, true);
246                        }
247                    }
248                    TaskGraphNode::Decl(decl) if decl.env().is_some() => {
249                        if !graph.contains_edge(*index, command) {
250                            graph.update_edge(*index, command, true);
251                        }
252                    }
253                    TaskGraphNode::Output(_) => {
254                        graph.update_edge(command, *index, true);
255                    }
256                    _ => continue,
257                }
258            }
259        }
260
261        graph
262    }
263
264    /// Adds a named node to the graph.
265    fn add_named_node(
266        &mut self,
267        name: Ident<N::Token>,
268        node: TaskGraphNode<N>,
269        graph: &mut DiGraph<TaskGraphNode<N>, bool>,
270        diagnostics: &mut Vec<Diagnostic>,
271    ) -> Option<NodeIndex> {
272        // Check for conflicting nodes
273        if let Some(existing) = self.names.get(name.text()) {
274            diagnostics.push(name_conflict(
275                name.text(),
276                node.context().expect("node should have context").into(),
277                graph[*existing]
278                    .context()
279                    .expect("node should have context")
280                    .into(),
281            ));
282            return None;
283        }
284
285        let index = graph.add_node(node);
286        self.names.insert(name.hashable(), index);
287        Some(index)
288    }
289
290    /// Adds edges from task sections to declarations.
291    fn add_section_edges(
292        &mut self,
293        from: NodeIndex,
294        descendants: impl Iterator<Item = NameRefExpr<N>>,
295        allow_task_var: bool,
296        graph: &mut DiGraph<TaskGraphNode<N>, bool>,
297        diagnostics: &mut Vec<Diagnostic>,
298        custom_type_present: impl Fn(&str) -> bool,
299    ) {
300        // Add edges for any descendant name references
301        for r in descendants {
302            let name = r.name();
303
304            // Look up the name; we don't check for cycles here as decls can't
305            // reference a section.
306            match self.names.get(name.text()) {
307                Some(to) => {
308                    graph.update_edge(*to, from, false);
309                }
310                _ => {
311                    if (name.text() != TASK_VAR_NAME || !allow_task_var)
312                        && !custom_type_present(name.text())
313                    {
314                        diagnostics.push(unknown_name(name.text(), name.span()));
315                    }
316                }
317            }
318        }
319    }
320
321    /// Adds name reference edges to the graph.
322    fn add_reference_edges(
323        &mut self,
324        version: SupportedVersion,
325        skip: Option<usize>,
326        graph: &mut DiGraph<TaskGraphNode<N>, bool>,
327        diagnostics: &mut Vec<Diagnostic>,
328        custom_type_present: impl Fn(&str) -> bool,
329    ) {
330        // Populate edges for any nodes that reference other nodes by name
331        for from in graph.node_indices().skip(skip.unwrap_or(0)) {
332            match graph[from].clone() {
333                TaskGraphNode::Input(decl) | TaskGraphNode::Decl(decl) => {
334                    if let Some(expr) = decl.expr() {
335                        self.add_expr_edges(
336                            from,
337                            expr,
338                            false,
339                            graph,
340                            diagnostics,
341                            &custom_type_present,
342                        );
343                    }
344                }
345                TaskGraphNode::Output(decl) => {
346                    if let Some(expr) = decl.expr() {
347                        self.add_expr_edges(
348                            from,
349                            expr,
350                            version >= SupportedVersion::V1(V1::Two),
351                            graph,
352                            diagnostics,
353                            &custom_type_present,
354                        );
355                    }
356                }
357                TaskGraphNode::Command(section) => {
358                    // Add name references from the command section to any decls in scope
359                    let section = section.clone();
360                    for part in section.parts() {
361                        if let CommandPart::Placeholder(p) = part {
362                            self.add_section_edges(
363                                from,
364                                p.descendants(),
365                                version >= SupportedVersion::V1(V1::Two),
366                                graph,
367                                diagnostics,
368                                &custom_type_present,
369                            );
370                        }
371                    }
372                }
373                TaskGraphNode::Runtime(section) => {
374                    // Add name references from the runtime section to any decls in scope
375                    let section = section.clone();
376                    for item in section.items() {
377                        self.add_section_edges(
378                            from,
379                            item.descendants(),
380                            version >= SupportedVersion::V1(V1::Three),
381                            graph,
382                            diagnostics,
383                            &custom_type_present,
384                        );
385                    }
386                }
387                TaskGraphNode::Requirements(section) => {
388                    // Add name references from the requirements section to any decls in scope
389                    let section = section.clone();
390                    for item in section.items() {
391                        self.add_section_edges(
392                            from,
393                            item.descendants(),
394                            version >= SupportedVersion::V1(V1::Three),
395                            graph,
396                            diagnostics,
397                            &custom_type_present,
398                        );
399                    }
400                }
401                TaskGraphNode::Hints(section) => {
402                    // Add name references from the hints section to any decls in scope
403                    let section = section.clone();
404                    for item in section.items() {
405                        self.add_section_edges(
406                            from,
407                            item.descendants(),
408                            version >= SupportedVersion::V1(V1::Three),
409                            graph,
410                            diagnostics,
411                            &custom_type_present,
412                        );
413                    }
414                }
415            }
416        }
417    }
418
419    /// Adds name reference edges for an expression.
420    fn add_expr_edges(
421        &mut self,
422        from: NodeIndex,
423        expr: Expr<N>,
424        allow_task_var: bool,
425        graph: &mut DiGraph<TaskGraphNode<N>, bool>,
426        diagnostics: &mut Vec<Diagnostic>,
427        custom_type_present: impl Fn(&str) -> bool,
428    ) {
429        for r in expr.descendants::<NameRefExpr<N>>() {
430            let name = r.name();
431
432            // Only add an edge if the name is known
433            match self.names.get(name.text()) {
434                Some(to) => {
435                    // Check to see if the node is self-referential
436                    if *to == from {
437                        diagnostics.push(self_referential(
438                            name.text(),
439                            graph[from]
440                                .context()
441                                .expect("node should have context")
442                                .span(),
443                            name.span(),
444                        ));
445                        continue;
446                    }
447
448                    // Check for a dependency cycle
449                    if has_path_connecting(graph as &_, from, *to, Some(&mut self.space)) {
450                        diagnostics.push(task_reference_cycle(
451                            &graph[from],
452                            r.span(),
453                            name.text(),
454                            graph[*to]
455                                .expr()
456                                .expect("should have expr to form a cycle")
457                                .span(),
458                        ));
459                        continue;
460                    }
461
462                    graph.update_edge(*to, from, false);
463                }
464                _ => {
465                    if (name.text() != TASK_VAR_NAME || !allow_task_var)
466                        && !custom_type_present(name.text())
467                    {
468                        diagnostics.push(unknown_name(name.text(), name.span()));
469                    }
470                }
471            }
472        }
473    }
474}
475
476impl<N: TreeNode> Default for TaskGraphBuilder<N> {
477    fn default() -> Self {
478        Self {
479            names: Default::default(),
480            command: Default::default(),
481            runtime: Default::default(),
482            requirements: Default::default(),
483            hints: Default::default(),
484            space: Default::default(),
485        }
486    }
487}
488
489/// Represents a node in an workflow evaluation graph.
490#[derive(Debug, Clone)]
491pub enum WorkflowGraphNode<N: TreeNode = SyntaxNode> {
492    /// The node is an input.
493    Input(Decl<N>),
494    /// The node is a private decl.
495    Decl(Decl<N>),
496    /// The node is an output decl.
497    Output(Decl<N>),
498    /// The node is a conditional statement.
499    ///
500    /// Stores the AST node along with the exit node index.
501    Conditional(ConditionalStatement<N>, NodeIndex),
502    /// The node represents a specific clause within a conditional statement.
503    ///
504    /// Stores the clause AST node and exit node index.
505    /// This allows each clause to have its own subgraph.
506    ConditionalClause(ConditionalStatementClause<N>, NodeIndex),
507    /// The node is a scatter statement.
508    ///
509    /// Stores the AST node along with the exit node index.
510    Scatter(ScatterStatement<N>, NodeIndex),
511    /// The node is a call statement.
512    Call(CallStatement<N>),
513    /// The node is an exit of a conditional statement.
514    ///
515    /// This is a special node that is paired with each conditional statement
516    /// node.
517    ///
518    /// It is the point by which the conditional is being exited and the outputs
519    /// of the statement are introduced into the parent scope.
520    ExitConditional(ConditionalStatement<N>),
521    /// The node is an exit of a scatter statement.
522    ///
523    /// This is a special node that is paired with each scatter statement node.
524    ///
525    /// It is the point by which the scatter is being exited and the outputs of
526    /// the statement are introduced into the parent scope.
527    ExitScatter(ScatterStatement<N>),
528}
529
530impl<N: TreeNode> WorkflowGraphNode<N> {
531    /// Gets the context of the name introduced by the node.
532    ///
533    /// Returns `None` if the node did not introduce a name.
534    pub fn context(&self) -> Option<NameContext> {
535        match self {
536            Self::Input(decl) => Some(NameContext::Input(decl.name().span())),
537            Self::Decl(decl) => Some(NameContext::Decl(decl.name().span())),
538            Self::Output(decl) => Some(NameContext::Output(decl.name().span())),
539            Self::Scatter(statement, _) => {
540                Some(NameContext::ScatterVariable(statement.variable().span()))
541            }
542            Self::Call(statement) => statement
543                .alias()
544                .map(|a| NameContext::Call(a.name().span()))
545                .or_else(|| {
546                    statement
547                        .target()
548                        .names()
549                        .last()
550                        .map(|t| NameContext::Call(t.span()))
551                }),
552            Self::Conditional(..)
553            | Self::ConditionalClause(..)
554            | Self::ExitConditional(_)
555            | Self::ExitScatter(_) => None,
556        }
557    }
558
559    /// Gets the inner node representation for the workflow graph node.
560    pub fn inner(&self) -> &N {
561        match self {
562            Self::Input(decl) | Self::Output(decl) | Self::Decl(decl) => decl.inner(),
563            Self::Conditional(stmt, ..) => stmt.inner(),
564            Self::ConditionalClause(stmt, ..) => stmt.inner(),
565            Self::Scatter(stmt, ..) => stmt.inner(),
566            Self::Call(stmt) => stmt.inner(),
567            Self::ExitConditional(stmt) => stmt.inner(),
568            Self::ExitScatter(stmt) => stmt.inner(),
569        }
570    }
571}
572
573impl<N: TreeNode> fmt::Display for WorkflowGraphNode<N> {
574    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
575        match self {
576            Self::Input(decl) | Self::Decl(decl) | Self::Output(decl) => {
577                write!(f, "`{name}`", name = decl.name().text())
578            }
579            Self::Scatter(statement, _) => {
580                write!(f, "`{name}`", name = statement.variable().text())
581            }
582            Self::Call(statement) => write!(
583                f,
584                "`{name}`",
585                name = statement
586                    .alias()
587                    .map(|a| a.name())
588                    .or_else(|| statement.target().names().last())
589                    .expect("should have name")
590                    .text()
591            ),
592            Self::Conditional(..) => write!(f, "conditional expression"),
593            Self::ConditionalClause(clause, _) => {
594                write!(f, "conditional clause ({})", clause.kind())
595            }
596            Self::ExitConditional(_) | Self::ExitScatter(_) => write!(f, "exit"),
597        }
598    }
599}
600
601/// The number of declarations to store in each [`SmallVec`].
602///
603/// You can think of this number as "what is the maximum reasonable number of
604/// clauses a conditional might have". You want the size to be large enough that
605/// _most_ conditionals will fit in it (avoiding spilling the references to the
606/// heap) but _small_ enough that it doesn't put unnecessary pressure on the
607/// stack size.
608///
609/// We chose `10` because it is fairly large while not being overly burdensome
610/// on the stack.
611const SMALLVEC_DECLS_LEN: usize = 10;
612
613/// Represents a builder of workflow evaluation graphs.
614#[derive(Debug)]
615pub struct WorkflowGraphBuilder<N: TreeNode = SyntaxNode> {
616    /// The map of declaration names to node indexes in the graph.
617    names: HashMap<TokenText<N::Token>, SmallVec<[NodeIndex; SMALLVEC_DECLS_LEN]>>,
618    /// A stack of scatter variable names.
619    variables: Vec<Ident<N::Token>>,
620    /// A map of AST syntax nodes to their entry and exit nodes in the graph.
621    ///
622    /// This is used to add edges to the graph for references to names that are
623    /// nested inside of conditional or scatter statements.
624    entry_exits: HashMap<N, (NodeIndex, NodeIndex)>,
625    /// Space for DFS operations when building the graph.
626    space: DfsSpace<NodeIndex, <DiGraph<WorkflowGraphNode<N>, ()> as Visitable>::Map>,
627    /// The common ancestor finder used when building the graph.
628    ancestor_finder: CommonAncestorFinder<N>,
629}
630
631impl<N: TreeNode> WorkflowGraphBuilder<N> {
632    /// Builds a new workflow evaluation graph.
633    ///
634    /// The nodes are [`WorkflowGraphNode`] and the edges represent a reverse
635    /// dependency relationship (A -> B => "node A is depended on by B").
636    pub fn build(
637        mut self,
638        workflow: &WorkflowDefinition<N>,
639        diagnostics: &mut Vec<Diagnostic>,
640        input_present: impl Fn(&str) -> bool,
641        custom_type_present: impl Fn(&str) -> bool,
642    ) -> DiGraph<WorkflowGraphNode<N>, ()> {
643        // Populate the declaration types and build a name reference graph
644        let mut graph = DiGraph::new();
645        let mut saw_inputs = false;
646        let mut outputs = None;
647        for item in workflow.items() {
648            match item {
649                WorkflowItem::Input(section) if !saw_inputs => {
650                    saw_inputs = true;
651                    for decl in section.declarations() {
652                        self.add_named_node(
653                            decl.name(),
654                            WorkflowGraphNode::Input(decl),
655                            &mut graph,
656                            diagnostics,
657                        );
658                    }
659                }
660                WorkflowItem::Output(section) if outputs.is_none() => {
661                    outputs = Some(section);
662                }
663                WorkflowItem::Conditional(statement) => {
664                    self.add_workflow_statement(
665                        WorkflowStatement::Conditional(statement),
666                        None,
667                        &mut graph,
668                        diagnostics,
669                    );
670                }
671                WorkflowItem::Scatter(statement) => {
672                    self.add_workflow_statement(
673                        WorkflowStatement::Scatter(statement),
674                        None,
675                        &mut graph,
676                        diagnostics,
677                    );
678                }
679                WorkflowItem::Call(statement) => {
680                    self.add_workflow_statement(
681                        WorkflowStatement::Call(statement),
682                        None,
683                        &mut graph,
684                        diagnostics,
685                    );
686                }
687                WorkflowItem::Declaration(decl) => {
688                    self.add_workflow_statement(
689                        WorkflowStatement::Declaration(decl),
690                        None,
691                        &mut graph,
692                        diagnostics,
693                    );
694                }
695                _ => continue,
696            }
697        }
698
699        // Add name reference edges before adding the outputs
700        self.add_reference_edges(
701            None,
702            &mut graph,
703            diagnostics,
704            &input_present,
705            &custom_type_present,
706        );
707
708        let count = graph.node_count();
709        if let Some(section) = outputs {
710            for decl in section.declarations() {
711                self.add_named_node(
712                    decl.name(),
713                    WorkflowGraphNode::Output(Decl::Bound(decl)),
714                    &mut graph,
715                    diagnostics,
716                );
717            }
718        }
719
720        // Add reference edges again, but only for the output declaration nodes
721        self.add_reference_edges(
722            Some(count),
723            &mut graph,
724            diagnostics,
725            &input_present,
726            &custom_type_present,
727        );
728        graph
729    }
730
731    /// Adds nodes from a workflow statement to the graph.
732    fn add_workflow_statement(
733        &mut self,
734        statement: WorkflowStatement<N>,
735        parent_entry_exit: Option<(NodeIndex, NodeIndex)>,
736        graph: &mut DiGraph<WorkflowGraphNode<N>, ()>,
737        diagnostics: &mut Vec<Diagnostic>,
738    ) {
739        let entry_exit = match statement {
740            WorkflowStatement::Conditional(statement) => {
741                // Create the exit node for the entire conditional statement
742                let exit = graph.add_node(WorkflowGraphNode::ExitConditional(statement.clone()));
743                // Create the main entry node
744                let entry = graph.add_node(WorkflowGraphNode::Conditional(statement.clone(), exit));
745
746                graph.update_edge(entry, exit, ());
747
748                self.entry_exits
749                    .insert(statement.inner().clone(), (entry, exit));
750
751                // Create a separate subgraph for each clause
752                for clause in statement.clauses() {
753                    // Create entry node for this specific clause
754                    let clause_entry =
755                        graph.add_node(WorkflowGraphNode::ConditionalClause(clause.clone(), exit));
756
757                    // Connect main entry to clause entry node
758                    graph.update_edge(entry, clause_entry, ());
759                    // Connect clause entry to the condition's exit node
760                    graph.update_edge(clause_entry, exit, ());
761
762                    // Store the clause's entry/exit nodes for its statements
763                    self.entry_exits
764                        .insert(clause.inner().clone(), (clause_entry, exit));
765
766                    // Add all statements within this clause
767                    for statement in clause.statements() {
768                        self.add_workflow_statement(
769                            statement,
770                            Some((clause_entry, exit)),
771                            graph,
772                            diagnostics,
773                        );
774                    }
775                }
776
777                Some((entry, exit))
778            }
779            WorkflowStatement::Scatter(statement) => {
780                // Create the entry and exit nodes for the scatter statement
781                // The exit node always depends on the entry node
782                let exit = graph.add_node(WorkflowGraphNode::ExitScatter(statement.clone()));
783                let entry = graph.add_node(WorkflowGraphNode::Scatter(statement.clone(), exit));
784                graph.update_edge(entry, exit, ());
785                self.entry_exits
786                    .insert(statement.inner().clone(), (entry, exit));
787
788                // Push the scatter variable onto the stack if it isn't already conflicting
789                let variable = statement.variable();
790                let pushed = match self.names.get(variable.text()) {
791                    Some(existing) => {
792                        // SAFETY: if this exists in the map, there will always
793                        // be at least one element.
794                        let first = existing[0];
795                        diagnostics.push(name_conflict(
796                            variable.text(),
797                            NameContext::ScatterVariable(variable.span()).into(),
798                            graph[first]
799                                .context()
800                                .expect("node should have context")
801                                .into(),
802                        ));
803                        false
804                    }
805                    _ => {
806                        self.variables.push(variable);
807                        true
808                    }
809                };
810
811                // Add all of the statement's statements
812                for statement in statement.statements() {
813                    self.add_workflow_statement(statement, Some((entry, exit)), graph, diagnostics);
814                }
815
816                if pushed {
817                    self.variables.pop();
818                }
819
820                Some((entry, exit))
821            }
822            WorkflowStatement::Call(statement) => {
823                let name = statement.alias().map(|a| a.name()).unwrap_or_else(|| {
824                    statement
825                        .target()
826                        .names()
827                        .last()
828                        .expect("expected a last call target name")
829                });
830
831                self.add_named_node(
832                    name,
833                    WorkflowGraphNode::Call(statement.clone()),
834                    graph,
835                    diagnostics,
836                )
837                // The calls's node is both the entry and exit nodes
838                .map(|i| (i, i))
839            }
840            WorkflowStatement::Declaration(decl) => self
841                .add_named_node(
842                    decl.name(),
843                    WorkflowGraphNode::Decl(Decl::Bound(decl)),
844                    graph,
845                    diagnostics,
846                )
847                // The declaration's node is both the entry and exit nodes
848                .map(|i| (i, i)),
849        };
850
851        // Add (reverse) dependency edges to parent entry from child entry and to child
852        // exit from parent exit
853        if let (Some((entry, exit)), Some((parent_entry, parent_exit))) =
854            (entry_exit, parent_entry_exit)
855        {
856            graph.update_edge(parent_entry, entry, ());
857            graph.update_edge(exit, parent_exit, ());
858        }
859    }
860
861    /// Adds a named node to the graph.
862    fn add_named_node(
863        &mut self,
864        name: Ident<N::Token>,
865        node: WorkflowGraphNode<N>,
866        graph: &mut DiGraph<WorkflowGraphNode<N>, ()>,
867        diagnostics: &mut Vec<Diagnostic>,
868    ) -> Option<NodeIndex> {
869        // Check for a conflicting name, either from a declaration or from a scatter
870        // variable
871        let (context, cont) = match self.names.get(name.text()) {
872            Some(existing) => {
873                let mut conflicting_context = None;
874
875                for idx in existing {
876                    let existing = &graph[*idx];
877
878                    // Allow conditionals where the names are duplicated across
879                    // clauses but not within them.
880                    if let (Some(existing_parent), Some(new_parent)) =
881                        (existing.inner().parent(), node.inner().parent())
882                        && let (Some(existing_grandparent), Some(new_grandparent)) =
883                            (existing_parent.parent(), new_parent.parent())
884                        && matches!(
885                            existing_grandparent.kind(),
886                            SyntaxKind::ConditionalStatementNode
887                        )
888                        && existing_parent != new_parent
889                        && existing_grandparent == new_grandparent
890                    {
891                        continue;
892                    }
893
894                    conflicting_context = existing.context();
895                    break;
896                }
897
898                if let Some(context) = conflicting_context {
899                    (Some(context), false)
900                } else {
901                    (None, true)
902                }
903            }
904            _ => {
905                match self.variables.iter().find(|i| i.text() == name.text()) {
906                    Some(existing) => {
907                        // Conflict with a scatter variable; we continue to add the node so that any
908                        // declaration overrides the scatter variable
909                        (Some(NameContext::ScatterVariable(existing.span())), true)
910                    }
911                    _ => {
912                        // No conflict
913                        (None, true)
914                    }
915                }
916            }
917        };
918
919        // Check to see if a diagnostic should be added
920        if let Some(context) = context {
921            let diagnostic = match &node {
922                WorkflowGraphNode::Call(call) => {
923                    call_conflict(&name, context, call.alias().is_none())
924                }
925                _ => name_conflict(
926                    name.text(),
927                    node.context().expect("node should have context").into(),
928                    context.into(),
929                ),
930            };
931
932            diagnostics.push(diagnostic);
933
934            if !cont {
935                return None;
936            }
937        }
938
939        let index = graph.add_node(node);
940        self.names.entry(name.hashable()).or_default().push(index);
941        Some(index)
942    }
943
944    /// Adds name reference edges to the graph.
945    fn add_reference_edges(
946        &mut self,
947        skip: Option<usize>,
948        graph: &mut DiGraph<WorkflowGraphNode<N>, ()>,
949        diagnostics: &mut Vec<Diagnostic>,
950        input_present: impl Fn(&str) -> bool,
951        custom_type_present: impl Fn(&str) -> bool,
952    ) {
953        // Populate edges for any nodes that reference other nodes by name
954        for from in graph.node_indices().skip(skip.unwrap_or(0)) {
955            match graph[from].clone() {
956                WorkflowGraphNode::Input(decl) => {
957                    // Only add edges for default expressions if the input wasn't provided
958                    if !input_present(decl.name().text())
959                        && let Some(expr) = decl.expr()
960                    {
961                        self.add_expr_edges(from, expr, graph, diagnostics, &custom_type_present);
962                    }
963                }
964
965                WorkflowGraphNode::Decl(decl) | WorkflowGraphNode::Output(decl) => {
966                    if let Some(expr) = decl.expr() {
967                        self.add_expr_edges(from, expr, graph, diagnostics, &custom_type_present);
968                    }
969                }
970                WorkflowGraphNode::Conditional(statement, _) => {
971                    for clause in statement.clauses() {
972                        let Some(expr) = clause.expr() else { continue };
973                        self.add_expr_edges(from, expr, graph, diagnostics, &custom_type_present);
974                    }
975                }
976                WorkflowGraphNode::ConditionalClause(..) => {
977                    // The expression edges for conditional clauses are handled
978                    // in the [`WorkflowGraphNode::Conditional`] case.
979                }
980                WorkflowGraphNode::Scatter(statement, _) => {
981                    self.add_expr_edges(
982                        from,
983                        statement.expr(),
984                        graph,
985                        diagnostics,
986                        &custom_type_present,
987                    );
988                }
989                WorkflowGraphNode::Call(statement) => {
990                    // Add edges for the input expressions
991                    // If an input does not have an expression, add an edge to the name
992                    for input in statement.inputs() {
993                        let name = input.name();
994                        match input.expr() {
995                            Some(expr) => {
996                                self.add_expr_edges(
997                                    from,
998                                    expr,
999                                    graph,
1000                                    diagnostics,
1001                                    &custom_type_present,
1002                                );
1003                            }
1004                            _ => {
1005                                if let Some(nodes) =
1006                                    self.find_nodes_by_name(name.text(), input.inner().clone())
1007                                {
1008                                    // Check for a dependency cycle
1009                                    for to in nodes {
1010                                        if has_path_connecting(
1011                                            graph as &_,
1012                                            from,
1013                                            to,
1014                                            Some(&mut self.space),
1015                                        ) {
1016                                            diagnostics.push(workflow_reference_cycle(
1017                                                &graph[from],
1018                                                name.span(),
1019                                                name.text(),
1020                                                graph[to]
1021                                                    .context()
1022                                                    .expect("node should have context")
1023                                                    .span(),
1024                                            ));
1025                                            continue;
1026                                        }
1027
1028                                        self.add_dependency_edge(from, to, graph);
1029                                    }
1030                                }
1031                            }
1032                        }
1033                    }
1034
1035                    // Add edges to the "after" calls
1036                    for after in statement.after() {
1037                        let name = after.name();
1038                        if let Some(nodes) =
1039                            self.find_nodes_by_name(name.text(), after.inner().clone())
1040                        {
1041                            for to in nodes {
1042                                // Check for a dependency cycle
1043                                if has_path_connecting(graph as &_, from, to, Some(&mut self.space))
1044                                {
1045                                    diagnostics.push(workflow_reference_cycle(
1046                                        &graph[from],
1047                                        name.span(),
1048                                        name.text(),
1049                                        graph[to]
1050                                            .context()
1051                                            .expect("node should have context")
1052                                            .span(),
1053                                    ));
1054                                    continue;
1055                                }
1056
1057                                self.add_dependency_edge(from, to, graph);
1058                            }
1059                        }
1060                    }
1061                }
1062                WorkflowGraphNode::ExitConditional(_) | WorkflowGraphNode::ExitScatter(_) => {
1063                    continue;
1064                }
1065            }
1066        }
1067    }
1068
1069    /// Adds name reference edges for an expression.
1070    fn add_expr_edges(
1071        &mut self,
1072        from: NodeIndex,
1073        expr: Expr<N>,
1074        graph: &mut DiGraph<WorkflowGraphNode<N>, ()>,
1075        diagnostics: &mut Vec<Diagnostic>,
1076        custom_type_present: impl Fn(&str) -> bool,
1077    ) {
1078        for r in expr.inner().descendants().filter_map(NameRefExpr::cast) {
1079            let name = r.name();
1080
1081            // Only add an edge if the name is known
1082            match self.find_nodes_by_name(name.text(), expr.inner().clone()) {
1083                Some(nodes) => {
1084                    for to in nodes {
1085                        // Check to see if the node is self-referential
1086                        if to == from {
1087                            diagnostics.push(self_referential(
1088                                name.text(),
1089                                graph[from]
1090                                    .context()
1091                                    .expect("node should have a context")
1092                                    .span(),
1093                                name.span(),
1094                            ));
1095                            continue;
1096                        }
1097
1098                        // Check for a dependency cycle
1099                        if has_path_connecting(graph as &_, from, to, Some(&mut self.space)) {
1100                            diagnostics.push(workflow_reference_cycle(
1101                                &graph[from],
1102                                r.span(),
1103                                name.text(),
1104                                graph[to]
1105                                    .context()
1106                                    .expect("node should have context")
1107                                    .span(),
1108                            ));
1109                            continue;
1110                        }
1111
1112                        self.add_dependency_edge(from, to, graph);
1113                    }
1114                }
1115                _ => {
1116                    // Check if name points to a custom type(a struct or an enum).
1117                    if !custom_type_present(name.text()) {
1118                        diagnostics.push(unknown_name(name.text(), name.span()));
1119                    }
1120                }
1121            }
1122        }
1123    }
1124
1125    /// Adds a dependency edge between two nodes.
1126    ///
1127    /// Dependency edges can only be formed between nodes at the same "scope".
1128    ///
1129    /// This works by walking up the AST ancestors looking for a common ancestor
1130    /// (A) of the two nodes.
1131    ///
1132    /// For the child of A that is an ancestor of `to` (or `to` itself), we use
1133    /// the exit node of that child if there is one.
1134    ///
1135    /// For the child of A this is an ancestor of `from` (or `from` itself), we
1136    /// use the entry node of that child if there is one.
1137    ///
1138    /// If either child does not have an entry/exit node, the original nodes are
1139    /// used.
1140    fn add_dependency_edge(
1141        &mut self,
1142        from: NodeIndex,
1143        to: NodeIndex,
1144        graph: &mut DiGraph<WorkflowGraphNode<N>, ()>,
1145    ) {
1146        assert!(from != to, "cannot add a self dependency edge");
1147
1148        let (from, to) = match self.ancestor_finder.find_children_of_common_ancestor(
1149            graph[from].inner().ancestors(),
1150            graph[to].inner().ancestors(),
1151            SyntaxKind::WorkflowDefinitionNode,
1152        ) {
1153            Some((f, t)) => {
1154                let from = self
1155                    .entry_exits
1156                    .get(&f)
1157                    .map(|(entry, _)| *entry)
1158                    .unwrap_or(from);
1159                let to = self
1160                    .entry_exits
1161                    .get(&t)
1162                    .map(|(_, exit)| *exit)
1163                    .unwrap_or(to);
1164                (from, to)
1165            }
1166            _ => (from, to),
1167        };
1168
1169        if from == to {
1170            // No need to add an edge when the entry and exit are the same node
1171            // This can occur for scatter variables referenced within the scatter body
1172            return;
1173        }
1174
1175        // Add the actual edge in reverse order
1176        graph.update_edge(to, from, ());
1177    }
1178
1179    /// Finds a node in the graph by name for the referencing expression.
1180    ///
1181    /// This takes into account finding a scatter variable that's in scope.
1182    fn find_nodes_by_name(
1183        &self,
1184        name: &str,
1185        expr: N,
1186    ) -> Option<SmallVec<[NodeIndex; SMALLVEC_DECLS_LEN]>> {
1187        // We need to walk up the parent chain looking for a scatter variable with a
1188        // matching name before looking at names in scope; a scatter variable may shadow
1189        // names declared outside of it, but an inner declaration cannot shadow an outer
1190        // scatter variable
1191        let mut current = expr;
1192        while let Some(parent) = current.parent() {
1193            if let SyntaxKind::ScatterStatementNode = parent.kind() {
1194                let statement = ScatterStatement::cast(parent.clone()).expect("node should cast");
1195                let variable = statement.variable();
1196                if variable.text() == name {
1197                    // Return the entry node for the scatter statement
1198                    return Some(smallvec![self.entry_exits[&parent].0]);
1199                }
1200            }
1201
1202            current = parent;
1203        }
1204
1205        // If the name came from a declaration or call, return the node
1206        if let Some(result) = self.names.get(name) {
1207            return Some(result.to_owned());
1208        }
1209
1210        None
1211    }
1212}
1213
1214impl<N: TreeNode> Default for WorkflowGraphBuilder<N> {
1215    fn default() -> Self {
1216        Self {
1217            names: Default::default(),
1218            variables: Default::default(),
1219            entry_exits: Default::default(),
1220            space: Default::default(),
1221            ancestor_finder: Default::default(),
1222        }
1223    }
1224}
1225
1226/// A helper for finding the children of a common ancestor in the AST.
1227///
1228/// This exists so we can reuse previously allocated space when adding
1229/// dependency edges.
1230#[derive(Debug)]
1231struct CommonAncestorFinder<N: TreeNode = SyntaxNode> {
1232    /// The stack of ancestors for the `first` node.
1233    first: Vec<N>,
1234    /// The stack of ancestors for the `second` node.
1235    second: Vec<N>,
1236}
1237
1238impl<N: TreeNode> CommonAncestorFinder<N> {
1239    /// Finds the children of a common ancestor in two list of ancestors.
1240    fn find_children_of_common_ancestor(
1241        &mut self,
1242        first: impl Iterator<Item = N>,
1243        second: impl Iterator<Item = N>,
1244        stop: SyntaxKind,
1245    ) -> Option<(N, N)> {
1246        self.first.clear();
1247        for ancestor in first {
1248            self.first.push(ancestor.clone());
1249            if ancestor.kind() == stop {
1250                break;
1251            }
1252        }
1253
1254        self.second.clear();
1255        for ancestor in second {
1256            self.second.push(ancestor.clone());
1257            if ancestor.kind() == stop {
1258                break;
1259            }
1260        }
1261
1262        for (first, second) in self.first.iter().rev().zip(self.second.iter().rev()) {
1263            if first == second {
1264                continue;
1265            }
1266
1267            return Some((first.clone(), second.clone()));
1268        }
1269
1270        None
1271    }
1272}
1273
1274impl<N: TreeNode> Default for CommonAncestorFinder<N> {
1275    fn default() -> Self {
1276        Self {
1277            first: Default::default(),
1278            second: Default::default(),
1279        }
1280    }
1281}
1282
1283#[cfg(test)]
1284mod test {
1285    use wdl_ast::Document;
1286
1287    use super::*;
1288
1289    #[test]
1290    fn test_input_dependency_handling() {
1291        let source = r#"
1292        version 1.1
1293
1294        task my_task {
1295            input {
1296                Int i
1297            }
1298
1299            command <<<>>>
1300
1301            output {
1302                Int out = i
1303            }
1304        }
1305
1306        workflow foo {
1307            input {
1308                Int x = 10
1309                Int y = t1.out
1310            }
1311
1312            call my_task as t1 { input: i = x }
1313            call my_task as t2 { input: i = y }
1314        }
1315        "#;
1316
1317        let (document, diagnostics) = Document::parse(source, None);
1318        assert!(
1319            diagnostics.is_empty(),
1320            "parsing should succeed without diagnostics"
1321        );
1322
1323        let workflow = document
1324            .ast()
1325            .into_v1()
1326            .expect("document should be v1")
1327            .workflows()
1328            .next()
1329            .expect("document should have a workflow");
1330
1331        let mut diagnostics = Vec::new();
1332
1333        // Testing without providing inputs i.e. static analysis
1334        let graph = WorkflowGraphBuilder::default().build(
1335            &workflow,
1336            &mut diagnostics,
1337            |_| false,
1338            |_| false,
1339        );
1340
1341        let t1_out = graph
1342            .node_indices()
1343            .find(|i| {
1344                if let WorkflowGraphNode::Call(call) = &graph[*i] {
1345                    call.alias().map(|a| a.name().text().to_string()) == Some("t1".to_string())
1346                } else {
1347                    false
1348                }
1349            })
1350            .expect("t1 node not found");
1351
1352        let y = graph
1353            .node_indices()
1354            .find(|i| {
1355                if let WorkflowGraphNode::Input(input) = &graph[*i] {
1356                    input.name().text() == "y"
1357                } else {
1358                    false
1359                }
1360            })
1361            .expect("y node not found");
1362
1363        assert!(
1364            graph.contains_edge(t1_out, y),
1365            "y should depend on t1.out when input 'y' is not provided"
1366        );
1367
1368        let y_input = graph
1369            .node_indices()
1370            .find(|i| {
1371                if let WorkflowGraphNode::Input(input) = &graph[*i] {
1372                    input.name().text() == "y"
1373                } else {
1374                    false
1375                }
1376            })
1377            .expect("y node not found");
1378
1379        let t2 = graph
1380            .node_indices()
1381            .find(|i| {
1382                if let WorkflowGraphNode::Call(call) = &graph[*i] {
1383                    call.alias().map(|a| a.name().text().to_string()) == Some("t2".to_string())
1384                } else {
1385                    false
1386                }
1387            })
1388            .expect("t2 node not found");
1389
1390        assert!(graph.contains_edge(y_input, t2), "t2 should depend on y");
1391
1392        // Testing with providing input y i.e. runtime analysis - case for wdl_engine
1393        let mut diagnostics = Vec::new();
1394        let graph = WorkflowGraphBuilder::default().build(
1395            &workflow,
1396            &mut diagnostics,
1397            |name| name == "y",
1398            |_| false,
1399        );
1400
1401        assert!(
1402            !graph.contains_edge(t1_out, y),
1403            "y should not depend on t1.out when input 'y' is provided"
1404        );
1405
1406        assert!(
1407            graph.contains_edge(y_input, t2),
1408            "t2 should depend on y even when input y is provided"
1409        );
1410    }
1411}