dialectic_compiler/
cfg.rs

1//! The control flow graph representation of a session type.
2
3use {
4    proc_macro2::Span,
5    std::{collections::HashSet, ops, rc::Rc},
6    syn::{Error, Type},
7    thunderdome::{Arena, Index},
8};
9
10use crate::{flow::FlowAnalysis, target::Target, CompileError, Spanned};
11
12/// A single entry for a node in the CFG.
13#[derive(Debug, Clone)]
14pub struct CfgNode {
15    /// The variant of this node.
16    pub expr: Ir,
17    /// The continuation of the node, if present. Absence of a continuation indicates the `Done` or
18    /// "halting" continuation.
19    pub next: Option<Index>,
20    /// The associated syntactic span of this node. If an error is emitted regarding this node, this
21    /// span may be used to correctly map it to a location in the surface syntax.
22    pub span: Span,
23    /// "Machine-generated" nodes are allowed to be unreachable. This is used when automatically
24    /// inserting continues when resolving scopes, so that if a continue is inserted which isn't
25    /// actually reachable, an error isn't emitted.
26    pub allow_unreachable: bool,
27    /// Nodes keep a hashset of errors in order to deduplicate any errors which are emitted multiple
28    /// times for the same node. Errors are traversed when code is emitted during
29    /// [`Cfg::generate_target`].
30    pub errors: HashSet<CompileError>,
31}
32
33/// The "kind" of a CFG node. CFG nodes have additional data stored in [`CfgNode`] which is always
34/// the same types and fields for every node, so we separate into the `Ir` variant and `CfgNode`
35/// wrapper/entry struct.
36#[derive(Debug, Clone)]
37pub enum Ir {
38    /// Indicating receiving a value of some type.
39    Recv(Type),
40    /// Indicating sending a value of some type.
41    Send(Type),
42    /// `Call` nodes act somewhat like a call/cc, and run their body continuation in the same scope
43    /// as they are called all the way to "Done" before running their own continuation.
44    Call(Option<Index>),
45    /// `Choose` nodes have a list of continuations which supersede their "next" pointer. Before
46    /// scope resolution, these continuations may be extended by their "implicit" subsequent
47    /// continuation, which is stored in the "next" pointer of the node. The scope resolution pass
48    /// "lowers" this next pointer continuation into the arms of the `Choose`, and so after scope
49    /// resolution all `Choose` nodes' next pointers should be `None`.
50    Choose(Vec<Option<Index>>),
51    /// Like `Choose`, `Offer` nodes have a list of choices, and after scope resolution have no
52    /// continuation.
53    Offer(Vec<Option<Index>>),
54    /// `Split` nodes have a transmit-only half and a receive-only half. The nodes' semantics are
55    /// similar to `Call`.
56    Split {
57        /// The transmit-only half.
58        tx_only: Option<Index>,
59        /// The receive-only half.
60        rx_only: Option<Index>,
61    },
62    /// Early on, loops *may* have a body; however, after scope resolution, all loops should have
63    /// their bodies be `Some`. So post scope resolution, this field may be unwrapped.
64    Loop(Option<Index>),
65    /// `Break` nodes directly reference the loop that they break. They can be considered as a
66    /// direct reference to the "next" pointer of the referenced loop node.
67    Break(Index),
68    /// Like break nodes, `Continue` nodes directly reference the loop that they continue.
69    /// Semantically they can be considered a reference to the body of the loop, but as they are a
70    /// special construct in the target language, we don't resolve them that way.
71    Continue(Index),
72    /// A "directly injected" type.
73    Type(Type),
74    /// Emitted when we need to have a node to put errors on but need to not reason about its
75    /// behavior.
76    Error,
77}
78
79impl CfgNode {
80    /// Shorthand for creating a [`CfgNode`] with a bunch of default values for its fields, which
81    /// you normally want, as well as a default span.
82    fn singleton(expr: Ir) -> Self {
83        Self {
84            expr,
85            next: None,
86            span: Span::call_site(),
87            allow_unreachable: false,
88            errors: HashSet::new(),
89        }
90    }
91
92    /// Shorthand for creating a [`CfgNode`] similarly to [`CfgNode::singleton`], but with an
93    /// associated span.
94    fn spanned(expr: Ir, span: Span) -> Self {
95        Self {
96            expr,
97            next: None,
98            span,
99            allow_unreachable: false,
100            errors: HashSet::new(),
101        }
102    }
103}
104
105/// A cfg of CFG nodes acting as a context for a single compilation unit.
106#[derive(Debug)]
107pub struct Cfg {
108    arena: Arena<CfgNode>,
109}
110
111impl ops::Index<Index> for Cfg {
112    type Output = CfgNode;
113
114    fn index(&self, index: Index) -> &Self::Output {
115        &self.arena[index]
116    }
117}
118
119impl ops::IndexMut<Index> for Cfg {
120    fn index_mut(&mut self, index: Index) -> &mut Self::Output {
121        &mut self.arena[index]
122    }
123}
124
125impl Default for Cfg {
126    fn default() -> Self {
127        Cfg::new()
128    }
129}
130
131impl Cfg {
132    /// Construct an empty control flow graph.
133    pub fn new() -> Self {
134        Self {
135            arena: Arena::new(),
136        }
137    }
138
139    /// Insert a constructed `CfgNode` into the CFG, and get back its newly minted index.
140    pub fn insert(&mut self, node: CfgNode) -> Index {
141        self.arena.insert(node)
142    }
143
144    /// Insert an `Error` node as a shim.
145    pub fn insert_error_at(&mut self, node: Index, kind: CompileError) {
146        self[node].errors.insert(kind);
147    }
148
149    /// Create a dummy `Error` node, as a stand-in for a node which should have been generated but
150    /// for some reason cannot be.
151    pub fn create_error(&mut self, kind: CompileError, span: Span) -> Index {
152        let mut errors = HashSet::new();
153        errors.insert(kind);
154        self.insert(CfgNode {
155            expr: Ir::Error,
156            next: None,
157            span,
158            errors,
159
160            // This node is actually usually generated during parsing for things like a `break` or
161            // `continue`. So, we actually do want to treat it as if the user wrote it themselves.
162            allow_unreachable: false,
163        })
164    }
165
166    /// Create a new node containing only this expression with no next-node link.
167    pub fn singleton(&mut self, expr: Ir) -> Index {
168        self.insert(CfgNode::singleton(expr))
169    }
170
171    /// Create a new node w/ an assigned span containing only this expression with no next-node
172    /// link.
173    pub fn spanned(&mut self, expr: Ir, span: Span) -> Index {
174        self.insert(CfgNode::spanned(expr, span))
175    }
176
177    /// The purpose of the scope resolution pass is to transform "implicit" continuations into
178    /// "explicit" continuations. For example, consider the session type `choose { ... }; send ()`.
179    /// In this type, the first parsing of the block will result in a `Choose` node with its
180    /// continuation/"next" pointer set to point to a `Send(())` node. However, in order to ensure
181    /// that the arms of the choose construct don't have to worry about the continuation that comes
182    /// "after" *in a higher/parent scope,* we have the resolve_scopes pass to "lower" this
183    /// continuation which implicitly follows every arm of the `Choose`, into becoming the
184    /// continuation of every relevant arm of the `Choose`. This does have some special cases, for
185    /// example we don't want to change or set a next continuation for a `Break` node or `Continue`
186    /// node.
187    pub fn resolve_scopes(&mut self, node: Option<Index>) {
188        // Depth-first once-only traversal of CFG, skipping nodes we've already seen
189        let mut visited = HashSet::new();
190
191        // The stack tracks pairs of implicit continuations (the absence of which indicates the
192        // `Done` continuation), and indices which might have that continuation or have children
193        // which might have that continuation. We push newly discovered nodes onto this stack, and
194        // pop from it to drive the traversal. We initialize the stack with the root node, if it is
195        // present.
196        let mut stack = vec![];
197        stack.extend(node.map(|root| (None, root)));
198
199        while let Some((implicit_cont, node)) = stack.pop() {
200            // "Follow" a node by checking to see if we have visited it, and if not, pushing it on
201            // to the stack to be visited later along with the supplied scoped implicit
202            // continuation.
203            let mut follow = |implicit_cont, node| {
204                if let Some(node) = node {
205                    if visited.insert(node) {
206                        stack.push((implicit_cont, node));
207                    }
208                }
209            };
210
211            // This match is followed by a statement which checks for a continuation and adds it to
212            // the queue if it finds one. As such, any branch which needs specifically to not check
213            // its continuation and recursively resolve scopes inside it, will be ended with a
214            // `continue`, to skip this recursion.
215            //
216            // Specifically this is *very* important for nodes such as `Offer` and `Choose`, which
217            // will no longer have a continuation at all after this pass, as their continuations
218            // will be "lifted" into each arm of the node.
219            let CfgNode { expr, next, .. } = &mut self[node];
220            match expr {
221                Ir::Recv(_)
222                | Ir::Send(_)
223                | Ir::Type(_)
224                | Ir::Break(_)
225                | Ir::Continue(_)
226                | Ir::Error => {}
227
228                Ir::Call(child) => {
229                    // `call` expressions evaluate until done; there is no implicit continuation
230                    // this is why we pass the `None` implicit continuation to `follow`
231                    follow(None, *child);
232                }
233                Ir::Split { tx_only, rx_only } => {
234                    // `split` expressions evaluate until done; there is no implicit continuation
235                    // this is why we pass the `None` implicit continuation to `follow`
236                    follow(None, *tx_only);
237                    follow(None, *rx_only);
238                }
239                Ir::Choose(choices) | Ir::Offer(choices) => {
240                    // Inline the current implicit continuation into the next pointer of the node,
241                    // if it is `Some`
242                    follow(implicit_cont, *next);
243                    // *Take* the next pointer out so that it is now None, as we are eliminating the
244                    // continuation of this node
245                    let new_implicit_cont = next.take().or(implicit_cont);
246
247                    // Follow each arm of the `choose`/`offer` using the new implicit continuation
248                    for choice in choices.iter_mut() {
249                        // If the choice is `Done`, we need to reassign it to the new implicit
250                        // continuation. Otherwise, we follow it.
251                        if choice.is_some() {
252                            follow(new_implicit_cont, *choice);
253                        } else {
254                            *choice = new_implicit_cont;
255                        }
256                    }
257                }
258                Ir::Loop(body) => {
259                    let body = *body;
260                    let continue0 = self.singleton(Ir::Continue(node));
261                    // We generated this without knowing for sure whether it is reachable or not, so
262                    // mark it machine-generated so that it doesn't trigger an unreachable code
263                    // error.
264                    self[continue0].allow_unreachable = true;
265                    if let Some(body) = body {
266                        // If the loop has a body, process it using the implicit continuation that
267                        // continues to the loop itself
268                        follow(Some(continue0), Some(body));
269                    } else {
270                        // Assign the body of the loop to `continue`: this will become an error in a
271                        // later pass, because `loop { continue }` is unproductive
272                        self[node].expr = Ir::Loop(Some(continue0));
273                    }
274                }
275            }
276
277            // Reborrow here because the `Loop` clause loses the borrow on self from expr/next.
278            let CfgNode { expr, next, .. } = &mut self[node];
279            match next {
280                // If the next pointer exists, follow it and continue converting its syntax tree,
281                // with the implicit continuation of *our current scope* (i.e. if we're in a `Loop`,
282                // we're still in that `Loop` after following the continuation)
283                Some(next) => follow(implicit_cont, Some(*next)),
284                // If there is no explicit continuation for this node, *and* it is not a `Choose` or
285                // `Offer` (which have had their explicit continuations erased and lowered into the
286                // arms) then we need to assign the scoped implicit continuation, if there is one,
287                // as the new explicit continuation
288                None if !matches!(expr, Ir::Choose(_) | Ir::Offer(_)) => *next = implicit_cont,
289                // This will only be reached if there is no explicit continuation and the node is a
290                // `Choose` or `Offer`, in which case we want to do nothing.
291                _ => {}
292            }
293        }
294    }
295
296    /// Compute the set of nodes which are reachable from the root of the [`Cfg`], using the results
297    /// of the [`FlowAnalysis`] to determine whether nodes can be passed through.
298    fn analyze_reachability(&self, flow: &FlowAnalysis, root: Index) -> HashSet<Index> {
299        let mut stack = vec![root];
300        let mut reachable = HashSet::new();
301
302        while let Some(node_index) = stack.pop() {
303            if !reachable.insert(node_index) {
304                continue;
305            }
306
307            let node = &self[node_index];
308
309            match &node.expr {
310                Ir::Loop(child) | Ir::Call(child) => {
311                    stack.extend(child);
312
313                    if flow.is_passable(node_index) {
314                        stack.extend(node.next);
315                    }
316                }
317                Ir::Split { tx_only, rx_only } => {
318                    stack.extend(tx_only.iter().chain(rx_only));
319
320                    if flow.is_passable(node_index) {
321                        stack.extend(node.next);
322                    }
323                }
324                Ir::Choose(choices) | Ir::Offer(choices) => {
325                    stack.extend(choices.iter().filter_map(Option::as_ref));
326
327                    assert!(node.next.is_none(), "at this point in the compiler, all continuations of \
328                        `Choose` and `Offer` nodes should have been eliminated by the resolve_scopes pass.");
329                }
330                Ir::Send(_)
331                | Ir::Recv(_)
332                | Ir::Type(_)
333                | Ir::Break(_)
334                | Ir::Continue(_)
335                | Ir::Error => {
336                    if flow.is_passable(node_index) {
337                        stack.extend(node.next);
338                    }
339                }
340            }
341        }
342
343        reachable
344    }
345
346    /// Attach errors to all dead code and code that leads to dead code, based on an internal call
347    /// to the flow analysis and reachability analysis on the graph.
348    pub fn report_dead_code(&mut self, node: Option<Index>) {
349        let root = match node {
350            Some(node) => node,
351            None => return,
352        };
353
354        let flow = crate::flow::analyze(self);
355        let reachable = self.analyze_reachability(&flow, root);
356
357        let mut errors = Vec::new();
358        for (node_index, node) in self.iter().filter(|(i, _)| reachable.contains(i)) {
359            // The boundary of dead code is: when an *impassable* node has a continuation which
360            // itself is *unreachable*.
361            if let Some(cont_index) = node.next {
362                if !flow.is_passable(node_index)
363                    && !self[cont_index].allow_unreachable
364                    && !reachable.contains(&cont_index)
365                {
366                    // Emit an error for the node which causes the unreachability, and the node
367                    // which is made to be unreachable.
368                    errors.push((node_index, CompileError::FollowingCodeUnreachable));
369                    errors.push((cont_index, CompileError::UnreachableStatement));
370                }
371            }
372        }
373
374        // Insert all the discovered errors at once
375        for (index, error) in errors {
376            self.insert_error_at(index, error);
377        }
378    }
379
380    /// Traverse a [`Cfg`] rooted at the specified index to produce either a valid [`Target`]
381    /// corresponding to it, or a [`syn::Error`] corresponding to one or more compiler errors.
382    ///
383    /// **Important:** This function **must** be called **after** the scope resolution pass, or else
384    /// the resultant code may omit sections of the input control flow graph due to implicit
385    /// continuations which have not yet been resolved.
386    pub fn generate_target(&mut self, node: Option<Index>) -> Result<Spanned<Target>, Error> {
387        // The loop environment is a stack of loop head indices, paired with whether or not they are
388        // currently known to be "productive": i.e., whether they contain something other than
389        // another loop before they hit their corresponding continue.
390        struct LoopEnv {
391            stack: Vec<(Index, bool)>,
392        }
393
394        impl LoopEnv {
395            // Make a new loop environment.
396            fn new() -> Self {
397                Self { stack: Vec::new() }
398            }
399
400            // Push a new loop index (initially tagged un-productive) onto the stack.
401            fn push(&mut self, index: Index) {
402                self.stack.push((index, false));
403            }
404
405            // Mark the most inner loop as being productive, if one exists.
406            fn mark_productive(&mut self) {
407                if let Some(top) = self.stack.last_mut() {
408                    top.1 = true;
409                }
410            }
411
412            // Check if the loop corresponding to the specified de Bruijn index is currently known
413            // to be productive.
414            fn productive_for_continue(&self, n: usize) -> bool {
415                self.stack.iter().rev().take(n + 1).any(|&(_, p)| p)
416            }
417
418            // Compute the de Bruijn index of a loop given its CFG index.
419            fn debruijn_index_of(&self, jump_target: Index) -> Option<usize> {
420                self.stack
421                    .iter()
422                    .rev()
423                    .position(|&(loop_node, _)| loop_node == jump_target)
424            }
425
426            fn pop(&mut self) {
427                self.stack.pop();
428            }
429        }
430
431        fn generate_inner(
432            cfg: &Cfg,
433            errors: &mut Vec<(Index, CompileError)>,
434            loop_env: &mut LoopEnv,
435            maybe_node: Option<Index>,
436        ) -> Spanned<Target> {
437            let (node, node_index) = match maybe_node {
438                Some(node) => (&cfg[node], node),
439                None => {
440                    return Spanned {
441                        inner: Target::Done,
442                        span: Span::call_site(),
443                    }
444                }
445            };
446
447            // In all cases except for `loop`, `continue`, and `break` nodes, we mark the node's
448            // current loop environment as being productive.
449            match &node.expr {
450                Ir::Loop(_) | Ir::Continue(_) | Ir::Break(_) => {}
451                _ => loop_env.mark_productive(),
452            }
453
454            let target = match &node.expr {
455                Ir::Recv(t) => {
456                    let t = t.clone();
457                    let cont = generate_inner(cfg, errors, loop_env, node.next);
458                    Target::Recv(t, Rc::new(cont))
459                }
460                Ir::Send(t) => {
461                    let cont = generate_inner(cfg, errors, loop_env, node.next);
462                    Target::Send(t.clone(), Rc::new(cont))
463                }
464                Ir::Call(callee) => {
465                    let cont = generate_inner(cfg, errors, loop_env, node.next);
466                    let callee = generate_inner(cfg, errors, loop_env, *callee);
467
468                    Target::Call(Rc::new(callee), Rc::new(cont))
469                }
470                Ir::Split { tx_only, rx_only } => {
471                    let cont = generate_inner(cfg, errors, loop_env, node.next);
472                    let (tx_only, rx_only) = (*tx_only, *rx_only);
473                    let tx_target = generate_inner(cfg, errors, loop_env, tx_only);
474                    let rx_target = generate_inner(cfg, errors, loop_env, rx_only);
475
476                    Target::Split {
477                        tx_only: Rc::new(tx_target),
478                        rx_only: Rc::new(rx_target),
479                        cont: Rc::new(cont),
480                    }
481                }
482                Ir::Choose(choices) => {
483                    let targets = choices
484                        .iter()
485                        .map(|&choice| generate_inner(cfg, errors, loop_env, choice))
486                        .collect();
487                    debug_assert!(node.next.is_none(), "non-`Done` continuation for `Choose`");
488
489                    Target::Choose(targets)
490                }
491                Ir::Offer(choices) => {
492                    let targets = choices
493                        .iter()
494                        .map(|&choice| generate_inner(cfg, errors, loop_env, choice))
495                        .collect();
496                    debug_assert!(node.next.is_none(), "non-`Done` continuation for `Offer`");
497
498                    Target::Offer(targets)
499                }
500                Ir::Loop(body) => {
501                    loop_env.push(node_index);
502
503                    let target =
504                        Target::Loop(Rc::new(generate_inner(cfg, errors, loop_env, *body)));
505
506                    loop_env.pop();
507
508                    target
509                }
510                Ir::Continue(of_loop) => {
511                    let jump_target = *of_loop;
512                    let debruijn_index = loop_env
513                        .debruijn_index_of(jump_target)
514                        .expect("continues are always well-scoped in the CFG");
515                    // Check if we are within a productive loop, and generate errors otherwise
516                    if !loop_env.productive_for_continue(debruijn_index) {
517                        // Emit an error for the unproductive loop
518                        errors.push((jump_target, CompileError::UnproductiveLoop));
519
520                        if !node.allow_unreachable {
521                            // We don't emit errors for machine-generated `continue`s, because they
522                            // would be unilluminating to the user
523                            errors.push((node_index, CompileError::UnproductiveContinue));
524                        }
525                    }
526                    Target::Continue(debruijn_index)
527                }
528                Ir::Type(t) => {
529                    // Optimize a little bit by emitting the `Then` trait invocation only when
530                    // necessary (when the continuation is not `Done`)
531                    match node.next {
532                        None => Target::Type(t.clone()),
533                        Some(cont_index) => {
534                            let cont = generate_inner(cfg, errors, loop_env, Some(cont_index));
535                            Target::Then(
536                                Rc::new(Spanned {
537                                    inner: Target::Type(t.clone()),
538                                    span: node.span,
539                                }),
540                                Rc::new(cont),
541                            )
542                        }
543                    }
544                }
545                // Early return cases: we inline these recursive calls, but we don't want to change
546                // their spans because they are already correct.
547                Ir::Break(of_loop) => {
548                    return generate_inner(cfg, errors, loop_env, cfg[*of_loop].next);
549                }
550                Ir::Error => {
551                    return generate_inner(cfg, errors, loop_env, node.next);
552                }
553            };
554
555            Spanned {
556                inner: target,
557                span: node.span,
558            }
559        }
560
561        // Generate the target, collecting unproductivity errors if any are discovered
562        let mut errors = Vec::new();
563        let output = generate_inner(self, &mut errors, &mut LoopEnv::new(), node);
564
565        // Insert all the found errors at once
566        for (index, kind) in errors {
567            self.insert_error_at(index, kind);
568        }
569
570        // Collect all the errors attached to any node in the syntax tree
571        let all_errors = self.arena.iter().flat_map(|(_, node)| {
572            node.errors.iter().map(move |err| Spanned {
573                inner: err.clone(),
574                span: node.span,
575            })
576        });
577
578        // Merge all collected errors into one `syn::Error`, if any were discovered
579        let mut maybe_error = None;
580        for reported_error in all_errors {
581            let new_error = Error::new(reported_error.span, reported_error.inner.to_string());
582            match maybe_error.as_mut() {
583                None => maybe_error = Some(new_error),
584                Some(accumulated_errors) => accumulated_errors.combine(new_error),
585            }
586        }
587
588        match maybe_error {
589            Some(error) => Err(error),
590            None => Ok(output),
591        }
592    }
593
594    /// Iterate over all nodes in the CFG. The order is not defined.
595    pub fn iter(&self) -> impl Iterator<Item = (Index, &CfgNode)> + '_ {
596        self.arena.iter()
597    }
598}