dialectic_compiler/cfg.rs
1//! The control flow graph representation of a session type.
2
3use {
4 proc_macro2::Span,
5 std::{collections::HashSet, ops, rc::Rc},
6 syn::{Error, Type},
7 thunderdome::{Arena, Index},
8};
9
10use crate::{flow::FlowAnalysis, target::Target, CompileError, Spanned};
11
12/// A single entry for a node in the CFG.
13#[derive(Debug, Clone)]
14pub struct CfgNode {
15 /// The variant of this node.
16 pub expr: Ir,
17 /// The continuation of the node, if present. Absence of a continuation indicates the `Done` or
18 /// "halting" continuation.
19 pub next: Option<Index>,
20 /// The associated syntactic span of this node. If an error is emitted regarding this node, this
21 /// span may be used to correctly map it to a location in the surface syntax.
22 pub span: Span,
23 /// "Machine-generated" nodes are allowed to be unreachable. This is used when automatically
24 /// inserting continues when resolving scopes, so that if a continue is inserted which isn't
25 /// actually reachable, an error isn't emitted.
26 pub allow_unreachable: bool,
27 /// Nodes keep a hashset of errors in order to deduplicate any errors which are emitted multiple
28 /// times for the same node. Errors are traversed when code is emitted during
29 /// [`Cfg::generate_target`].
30 pub errors: HashSet<CompileError>,
31}
32
33/// The "kind" of a CFG node. CFG nodes have additional data stored in [`CfgNode`] which is always
34/// the same types and fields for every node, so we separate into the `Ir` variant and `CfgNode`
35/// wrapper/entry struct.
36#[derive(Debug, Clone)]
37pub enum Ir {
38 /// Indicating receiving a value of some type.
39 Recv(Type),
40 /// Indicating sending a value of some type.
41 Send(Type),
42 /// `Call` nodes act somewhat like a call/cc, and run their body continuation in the same scope
43 /// as they are called all the way to "Done" before running their own continuation.
44 Call(Option<Index>),
45 /// `Choose` nodes have a list of continuations which supersede their "next" pointer. Before
46 /// scope resolution, these continuations may be extended by their "implicit" subsequent
47 /// continuation, which is stored in the "next" pointer of the node. The scope resolution pass
48 /// "lowers" this next pointer continuation into the arms of the `Choose`, and so after scope
49 /// resolution all `Choose` nodes' next pointers should be `None`.
50 Choose(Vec<Option<Index>>),
51 /// Like `Choose`, `Offer` nodes have a list of choices, and after scope resolution have no
52 /// continuation.
53 Offer(Vec<Option<Index>>),
54 /// `Split` nodes have a transmit-only half and a receive-only half. The nodes' semantics are
55 /// similar to `Call`.
56 Split {
57 /// The transmit-only half.
58 tx_only: Option<Index>,
59 /// The receive-only half.
60 rx_only: Option<Index>,
61 },
62 /// Early on, loops *may* have a body; however, after scope resolution, all loops should have
63 /// their bodies be `Some`. So post scope resolution, this field may be unwrapped.
64 Loop(Option<Index>),
65 /// `Break` nodes directly reference the loop that they break. They can be considered as a
66 /// direct reference to the "next" pointer of the referenced loop node.
67 Break(Index),
68 /// Like break nodes, `Continue` nodes directly reference the loop that they continue.
69 /// Semantically they can be considered a reference to the body of the loop, but as they are a
70 /// special construct in the target language, we don't resolve them that way.
71 Continue(Index),
72 /// A "directly injected" type.
73 Type(Type),
74 /// Emitted when we need to have a node to put errors on but need to not reason about its
75 /// behavior.
76 Error,
77}
78
79impl CfgNode {
80 /// Shorthand for creating a [`CfgNode`] with a bunch of default values for its fields, which
81 /// you normally want, as well as a default span.
82 fn singleton(expr: Ir) -> Self {
83 Self {
84 expr,
85 next: None,
86 span: Span::call_site(),
87 allow_unreachable: false,
88 errors: HashSet::new(),
89 }
90 }
91
92 /// Shorthand for creating a [`CfgNode`] similarly to [`CfgNode::singleton`], but with an
93 /// associated span.
94 fn spanned(expr: Ir, span: Span) -> Self {
95 Self {
96 expr,
97 next: None,
98 span,
99 allow_unreachable: false,
100 errors: HashSet::new(),
101 }
102 }
103}
104
105/// A cfg of CFG nodes acting as a context for a single compilation unit.
106#[derive(Debug)]
107pub struct Cfg {
108 arena: Arena<CfgNode>,
109}
110
111impl ops::Index<Index> for Cfg {
112 type Output = CfgNode;
113
114 fn index(&self, index: Index) -> &Self::Output {
115 &self.arena[index]
116 }
117}
118
119impl ops::IndexMut<Index> for Cfg {
120 fn index_mut(&mut self, index: Index) -> &mut Self::Output {
121 &mut self.arena[index]
122 }
123}
124
125impl Default for Cfg {
126 fn default() -> Self {
127 Cfg::new()
128 }
129}
130
131impl Cfg {
132 /// Construct an empty control flow graph.
133 pub fn new() -> Self {
134 Self {
135 arena: Arena::new(),
136 }
137 }
138
139 /// Insert a constructed `CfgNode` into the CFG, and get back its newly minted index.
140 pub fn insert(&mut self, node: CfgNode) -> Index {
141 self.arena.insert(node)
142 }
143
144 /// Insert an `Error` node as a shim.
145 pub fn insert_error_at(&mut self, node: Index, kind: CompileError) {
146 self[node].errors.insert(kind);
147 }
148
149 /// Create a dummy `Error` node, as a stand-in for a node which should have been generated but
150 /// for some reason cannot be.
151 pub fn create_error(&mut self, kind: CompileError, span: Span) -> Index {
152 let mut errors = HashSet::new();
153 errors.insert(kind);
154 self.insert(CfgNode {
155 expr: Ir::Error,
156 next: None,
157 span,
158 errors,
159
160 // This node is actually usually generated during parsing for things like a `break` or
161 // `continue`. So, we actually do want to treat it as if the user wrote it themselves.
162 allow_unreachable: false,
163 })
164 }
165
166 /// Create a new node containing only this expression with no next-node link.
167 pub fn singleton(&mut self, expr: Ir) -> Index {
168 self.insert(CfgNode::singleton(expr))
169 }
170
171 /// Create a new node w/ an assigned span containing only this expression with no next-node
172 /// link.
173 pub fn spanned(&mut self, expr: Ir, span: Span) -> Index {
174 self.insert(CfgNode::spanned(expr, span))
175 }
176
177 /// The purpose of the scope resolution pass is to transform "implicit" continuations into
178 /// "explicit" continuations. For example, consider the session type `choose { ... }; send ()`.
179 /// In this type, the first parsing of the block will result in a `Choose` node with its
180 /// continuation/"next" pointer set to point to a `Send(())` node. However, in order to ensure
181 /// that the arms of the choose construct don't have to worry about the continuation that comes
182 /// "after" *in a higher/parent scope,* we have the resolve_scopes pass to "lower" this
183 /// continuation which implicitly follows every arm of the `Choose`, into becoming the
184 /// continuation of every relevant arm of the `Choose`. This does have some special cases, for
185 /// example we don't want to change or set a next continuation for a `Break` node or `Continue`
186 /// node.
187 pub fn resolve_scopes(&mut self, node: Option<Index>) {
188 // Depth-first once-only traversal of CFG, skipping nodes we've already seen
189 let mut visited = HashSet::new();
190
191 // The stack tracks pairs of implicit continuations (the absence of which indicates the
192 // `Done` continuation), and indices which might have that continuation or have children
193 // which might have that continuation. We push newly discovered nodes onto this stack, and
194 // pop from it to drive the traversal. We initialize the stack with the root node, if it is
195 // present.
196 let mut stack = vec![];
197 stack.extend(node.map(|root| (None, root)));
198
199 while let Some((implicit_cont, node)) = stack.pop() {
200 // "Follow" a node by checking to see if we have visited it, and if not, pushing it on
201 // to the stack to be visited later along with the supplied scoped implicit
202 // continuation.
203 let mut follow = |implicit_cont, node| {
204 if let Some(node) = node {
205 if visited.insert(node) {
206 stack.push((implicit_cont, node));
207 }
208 }
209 };
210
211 // This match is followed by a statement which checks for a continuation and adds it to
212 // the queue if it finds one. As such, any branch which needs specifically to not check
213 // its continuation and recursively resolve scopes inside it, will be ended with a
214 // `continue`, to skip this recursion.
215 //
216 // Specifically this is *very* important for nodes such as `Offer` and `Choose`, which
217 // will no longer have a continuation at all after this pass, as their continuations
218 // will be "lifted" into each arm of the node.
219 let CfgNode { expr, next, .. } = &mut self[node];
220 match expr {
221 Ir::Recv(_)
222 | Ir::Send(_)
223 | Ir::Type(_)
224 | Ir::Break(_)
225 | Ir::Continue(_)
226 | Ir::Error => {}
227
228 Ir::Call(child) => {
229 // `call` expressions evaluate until done; there is no implicit continuation
230 // this is why we pass the `None` implicit continuation to `follow`
231 follow(None, *child);
232 }
233 Ir::Split { tx_only, rx_only } => {
234 // `split` expressions evaluate until done; there is no implicit continuation
235 // this is why we pass the `None` implicit continuation to `follow`
236 follow(None, *tx_only);
237 follow(None, *rx_only);
238 }
239 Ir::Choose(choices) | Ir::Offer(choices) => {
240 // Inline the current implicit continuation into the next pointer of the node,
241 // if it is `Some`
242 follow(implicit_cont, *next);
243 // *Take* the next pointer out so that it is now None, as we are eliminating the
244 // continuation of this node
245 let new_implicit_cont = next.take().or(implicit_cont);
246
247 // Follow each arm of the `choose`/`offer` using the new implicit continuation
248 for choice in choices.iter_mut() {
249 // If the choice is `Done`, we need to reassign it to the new implicit
250 // continuation. Otherwise, we follow it.
251 if choice.is_some() {
252 follow(new_implicit_cont, *choice);
253 } else {
254 *choice = new_implicit_cont;
255 }
256 }
257 }
258 Ir::Loop(body) => {
259 let body = *body;
260 let continue0 = self.singleton(Ir::Continue(node));
261 // We generated this without knowing for sure whether it is reachable or not, so
262 // mark it machine-generated so that it doesn't trigger an unreachable code
263 // error.
264 self[continue0].allow_unreachable = true;
265 if let Some(body) = body {
266 // If the loop has a body, process it using the implicit continuation that
267 // continues to the loop itself
268 follow(Some(continue0), Some(body));
269 } else {
270 // Assign the body of the loop to `continue`: this will become an error in a
271 // later pass, because `loop { continue }` is unproductive
272 self[node].expr = Ir::Loop(Some(continue0));
273 }
274 }
275 }
276
277 // Reborrow here because the `Loop` clause loses the borrow on self from expr/next.
278 let CfgNode { expr, next, .. } = &mut self[node];
279 match next {
280 // If the next pointer exists, follow it and continue converting its syntax tree,
281 // with the implicit continuation of *our current scope* (i.e. if we're in a `Loop`,
282 // we're still in that `Loop` after following the continuation)
283 Some(next) => follow(implicit_cont, Some(*next)),
284 // If there is no explicit continuation for this node, *and* it is not a `Choose` or
285 // `Offer` (which have had their explicit continuations erased and lowered into the
286 // arms) then we need to assign the scoped implicit continuation, if there is one,
287 // as the new explicit continuation
288 None if !matches!(expr, Ir::Choose(_) | Ir::Offer(_)) => *next = implicit_cont,
289 // This will only be reached if there is no explicit continuation and the node is a
290 // `Choose` or `Offer`, in which case we want to do nothing.
291 _ => {}
292 }
293 }
294 }
295
296 /// Compute the set of nodes which are reachable from the root of the [`Cfg`], using the results
297 /// of the [`FlowAnalysis`] to determine whether nodes can be passed through.
298 fn analyze_reachability(&self, flow: &FlowAnalysis, root: Index) -> HashSet<Index> {
299 let mut stack = vec![root];
300 let mut reachable = HashSet::new();
301
302 while let Some(node_index) = stack.pop() {
303 if !reachable.insert(node_index) {
304 continue;
305 }
306
307 let node = &self[node_index];
308
309 match &node.expr {
310 Ir::Loop(child) | Ir::Call(child) => {
311 stack.extend(child);
312
313 if flow.is_passable(node_index) {
314 stack.extend(node.next);
315 }
316 }
317 Ir::Split { tx_only, rx_only } => {
318 stack.extend(tx_only.iter().chain(rx_only));
319
320 if flow.is_passable(node_index) {
321 stack.extend(node.next);
322 }
323 }
324 Ir::Choose(choices) | Ir::Offer(choices) => {
325 stack.extend(choices.iter().filter_map(Option::as_ref));
326
327 assert!(node.next.is_none(), "at this point in the compiler, all continuations of \
328 `Choose` and `Offer` nodes should have been eliminated by the resolve_scopes pass.");
329 }
330 Ir::Send(_)
331 | Ir::Recv(_)
332 | Ir::Type(_)
333 | Ir::Break(_)
334 | Ir::Continue(_)
335 | Ir::Error => {
336 if flow.is_passable(node_index) {
337 stack.extend(node.next);
338 }
339 }
340 }
341 }
342
343 reachable
344 }
345
346 /// Attach errors to all dead code and code that leads to dead code, based on an internal call
347 /// to the flow analysis and reachability analysis on the graph.
348 pub fn report_dead_code(&mut self, node: Option<Index>) {
349 let root = match node {
350 Some(node) => node,
351 None => return,
352 };
353
354 let flow = crate::flow::analyze(self);
355 let reachable = self.analyze_reachability(&flow, root);
356
357 let mut errors = Vec::new();
358 for (node_index, node) in self.iter().filter(|(i, _)| reachable.contains(i)) {
359 // The boundary of dead code is: when an *impassable* node has a continuation which
360 // itself is *unreachable*.
361 if let Some(cont_index) = node.next {
362 if !flow.is_passable(node_index)
363 && !self[cont_index].allow_unreachable
364 && !reachable.contains(&cont_index)
365 {
366 // Emit an error for the node which causes the unreachability, and the node
367 // which is made to be unreachable.
368 errors.push((node_index, CompileError::FollowingCodeUnreachable));
369 errors.push((cont_index, CompileError::UnreachableStatement));
370 }
371 }
372 }
373
374 // Insert all the discovered errors at once
375 for (index, error) in errors {
376 self.insert_error_at(index, error);
377 }
378 }
379
380 /// Traverse a [`Cfg`] rooted at the specified index to produce either a valid [`Target`]
381 /// corresponding to it, or a [`syn::Error`] corresponding to one or more compiler errors.
382 ///
383 /// **Important:** This function **must** be called **after** the scope resolution pass, or else
384 /// the resultant code may omit sections of the input control flow graph due to implicit
385 /// continuations which have not yet been resolved.
386 pub fn generate_target(&mut self, node: Option<Index>) -> Result<Spanned<Target>, Error> {
387 // The loop environment is a stack of loop head indices, paired with whether or not they are
388 // currently known to be "productive": i.e., whether they contain something other than
389 // another loop before they hit their corresponding continue.
390 struct LoopEnv {
391 stack: Vec<(Index, bool)>,
392 }
393
394 impl LoopEnv {
395 // Make a new loop environment.
396 fn new() -> Self {
397 Self { stack: Vec::new() }
398 }
399
400 // Push a new loop index (initially tagged un-productive) onto the stack.
401 fn push(&mut self, index: Index) {
402 self.stack.push((index, false));
403 }
404
405 // Mark the most inner loop as being productive, if one exists.
406 fn mark_productive(&mut self) {
407 if let Some(top) = self.stack.last_mut() {
408 top.1 = true;
409 }
410 }
411
412 // Check if the loop corresponding to the specified de Bruijn index is currently known
413 // to be productive.
414 fn productive_for_continue(&self, n: usize) -> bool {
415 self.stack.iter().rev().take(n + 1).any(|&(_, p)| p)
416 }
417
418 // Compute the de Bruijn index of a loop given its CFG index.
419 fn debruijn_index_of(&self, jump_target: Index) -> Option<usize> {
420 self.stack
421 .iter()
422 .rev()
423 .position(|&(loop_node, _)| loop_node == jump_target)
424 }
425
426 fn pop(&mut self) {
427 self.stack.pop();
428 }
429 }
430
431 fn generate_inner(
432 cfg: &Cfg,
433 errors: &mut Vec<(Index, CompileError)>,
434 loop_env: &mut LoopEnv,
435 maybe_node: Option<Index>,
436 ) -> Spanned<Target> {
437 let (node, node_index) = match maybe_node {
438 Some(node) => (&cfg[node], node),
439 None => {
440 return Spanned {
441 inner: Target::Done,
442 span: Span::call_site(),
443 }
444 }
445 };
446
447 // In all cases except for `loop`, `continue`, and `break` nodes, we mark the node's
448 // current loop environment as being productive.
449 match &node.expr {
450 Ir::Loop(_) | Ir::Continue(_) | Ir::Break(_) => {}
451 _ => loop_env.mark_productive(),
452 }
453
454 let target = match &node.expr {
455 Ir::Recv(t) => {
456 let t = t.clone();
457 let cont = generate_inner(cfg, errors, loop_env, node.next);
458 Target::Recv(t, Rc::new(cont))
459 }
460 Ir::Send(t) => {
461 let cont = generate_inner(cfg, errors, loop_env, node.next);
462 Target::Send(t.clone(), Rc::new(cont))
463 }
464 Ir::Call(callee) => {
465 let cont = generate_inner(cfg, errors, loop_env, node.next);
466 let callee = generate_inner(cfg, errors, loop_env, *callee);
467
468 Target::Call(Rc::new(callee), Rc::new(cont))
469 }
470 Ir::Split { tx_only, rx_only } => {
471 let cont = generate_inner(cfg, errors, loop_env, node.next);
472 let (tx_only, rx_only) = (*tx_only, *rx_only);
473 let tx_target = generate_inner(cfg, errors, loop_env, tx_only);
474 let rx_target = generate_inner(cfg, errors, loop_env, rx_only);
475
476 Target::Split {
477 tx_only: Rc::new(tx_target),
478 rx_only: Rc::new(rx_target),
479 cont: Rc::new(cont),
480 }
481 }
482 Ir::Choose(choices) => {
483 let targets = choices
484 .iter()
485 .map(|&choice| generate_inner(cfg, errors, loop_env, choice))
486 .collect();
487 debug_assert!(node.next.is_none(), "non-`Done` continuation for `Choose`");
488
489 Target::Choose(targets)
490 }
491 Ir::Offer(choices) => {
492 let targets = choices
493 .iter()
494 .map(|&choice| generate_inner(cfg, errors, loop_env, choice))
495 .collect();
496 debug_assert!(node.next.is_none(), "non-`Done` continuation for `Offer`");
497
498 Target::Offer(targets)
499 }
500 Ir::Loop(body) => {
501 loop_env.push(node_index);
502
503 let target =
504 Target::Loop(Rc::new(generate_inner(cfg, errors, loop_env, *body)));
505
506 loop_env.pop();
507
508 target
509 }
510 Ir::Continue(of_loop) => {
511 let jump_target = *of_loop;
512 let debruijn_index = loop_env
513 .debruijn_index_of(jump_target)
514 .expect("continues are always well-scoped in the CFG");
515 // Check if we are within a productive loop, and generate errors otherwise
516 if !loop_env.productive_for_continue(debruijn_index) {
517 // Emit an error for the unproductive loop
518 errors.push((jump_target, CompileError::UnproductiveLoop));
519
520 if !node.allow_unreachable {
521 // We don't emit errors for machine-generated `continue`s, because they
522 // would be unilluminating to the user
523 errors.push((node_index, CompileError::UnproductiveContinue));
524 }
525 }
526 Target::Continue(debruijn_index)
527 }
528 Ir::Type(t) => {
529 // Optimize a little bit by emitting the `Then` trait invocation only when
530 // necessary (when the continuation is not `Done`)
531 match node.next {
532 None => Target::Type(t.clone()),
533 Some(cont_index) => {
534 let cont = generate_inner(cfg, errors, loop_env, Some(cont_index));
535 Target::Then(
536 Rc::new(Spanned {
537 inner: Target::Type(t.clone()),
538 span: node.span,
539 }),
540 Rc::new(cont),
541 )
542 }
543 }
544 }
545 // Early return cases: we inline these recursive calls, but we don't want to change
546 // their spans because they are already correct.
547 Ir::Break(of_loop) => {
548 return generate_inner(cfg, errors, loop_env, cfg[*of_loop].next);
549 }
550 Ir::Error => {
551 return generate_inner(cfg, errors, loop_env, node.next);
552 }
553 };
554
555 Spanned {
556 inner: target,
557 span: node.span,
558 }
559 }
560
561 // Generate the target, collecting unproductivity errors if any are discovered
562 let mut errors = Vec::new();
563 let output = generate_inner(self, &mut errors, &mut LoopEnv::new(), node);
564
565 // Insert all the found errors at once
566 for (index, kind) in errors {
567 self.insert_error_at(index, kind);
568 }
569
570 // Collect all the errors attached to any node in the syntax tree
571 let all_errors = self.arena.iter().flat_map(|(_, node)| {
572 node.errors.iter().map(move |err| Spanned {
573 inner: err.clone(),
574 span: node.span,
575 })
576 });
577
578 // Merge all collected errors into one `syn::Error`, if any were discovered
579 let mut maybe_error = None;
580 for reported_error in all_errors {
581 let new_error = Error::new(reported_error.span, reported_error.inner.to_string());
582 match maybe_error.as_mut() {
583 None => maybe_error = Some(new_error),
584 Some(accumulated_errors) => accumulated_errors.combine(new_error),
585 }
586 }
587
588 match maybe_error {
589 Some(error) => Err(error),
590 None => Ok(output),
591 }
592 }
593
594 /// Iterate over all nodes in the CFG. The order is not defined.
595 pub fn iter(&self) -> impl Iterator<Item = (Index, &CfgNode)> + '_ {
596 self.arena.iter()
597 }
598}