biome_control_flow/lib.rs
1#![deny(rustdoc::broken_intra_doc_links)]
2
3use std::fmt::{self, Display, Formatter};
4
5use biome_rowan::{Language, SyntaxElement, SyntaxNode};
6use rustc_hash::FxHashMap;
7
8pub mod builder;
9
10use crate::builder::BlockId;
11
12/// The [ControlFlowGraph] is an auxiliary data structure to the syntax tree,
13/// representing the execution order of statements and expressions in a given
14/// function as a graph of [BasicBlock]
15#[derive(Debug, Clone)]
16pub struct ControlFlowGraph<L: Language> {
17 /// List of blocks that make up this function
18 pub blocks: Vec<BasicBlock<L>>,
19 /// The function node this CFG was built for in the syntax tree
20 pub node: SyntaxNode<L>,
21}
22
23impl<L: Language> ControlFlowGraph<L> {
24 fn new(node: SyntaxNode<L>) -> Self {
25 ControlFlowGraph {
26 blocks: vec![BasicBlock::new(None, None)],
27 node,
28 }
29 }
30
31 /// Returns the block identified by `id`.
32 pub fn get(&self, id: BlockId) -> &BasicBlock<L> {
33 // SAFETY: safe conversion because a block id corresponds to its index in `self.blocks`.
34 let block_index = id.index() as usize;
35 &self.blocks[block_index]
36 }
37
38 /// Returns pairs that contain a block identifier and the associated block
39 pub fn block_id_iter(&self) -> impl Iterator<Item = (BlockId, &BasicBlock<L>)> {
40 // SAFETY: safe conversion because a block id corresponds to its index in `self.blocks`.
41 self.blocks.iter().enumerate().map(|(index, block)| {
42 (
43 BlockId {
44 index: index as u32,
45 },
46 block,
47 )
48 })
49 }
50}
51
52/// A basic block represents an atomic unit of control flow, a flat list of
53/// instructions that will be executed linearly when a function is run.
54///
55/// Note, however, that while the instructions that comprise a basic block are
56/// guaranteed to be executed in order from the start towards the end, the
57/// block may not be executed entirely if a jump or return instruction is
58/// encountered.
59#[derive(Debug, Clone)]
60pub struct BasicBlock<L: Language> {
61 pub instructions: Vec<Instruction<L>>,
62 /// List of handlers to execute when an exception is thrown from this block
63 pub exception_handlers: Vec<ExceptionHandler>,
64 /// List of handlers to execute when the function returns from this block
65 pub cleanup_handlers: Vec<ExceptionHandler>,
66}
67
68impl<L: Language> BasicBlock<L> {
69 fn new(
70 exception_handlers: impl IntoIterator<Item = ExceptionHandler>,
71 cleanup_handlers: impl IntoIterator<Item = ExceptionHandler>,
72 ) -> Self {
73 Self {
74 instructions: Vec::new(),
75 exception_handlers: exception_handlers.into_iter().collect(),
76 cleanup_handlers: cleanup_handlers.into_iter().collect(),
77 }
78 }
79}
80
81/// Instructions are used to represent statements or expressions being executed
82/// as side effects, as well as potential divergence points for the control flow.
83///
84/// Each node has an associated kind, as well as an optional syntax node: the
85/// node is useful to emit diagnostics but does not have a semantic value, and
86/// is optional. The code generating the control flow graph may emit
87/// instructions that do not correspond to any node in the syntax tree, to model
88/// the control flow of the program accurately.
89#[derive(Debug, Clone)]
90pub struct Instruction<L: Language> {
91 pub kind: InstructionKind,
92 pub node: Option<SyntaxElement<L>>,
93}
94
95/// The different types of supported [Instruction]
96#[derive(Copy, Clone, Debug)]
97pub enum InstructionKind {
98 /// Indicates the [SyntaxNode](biome_rowan::SyntaxNode) associated with this
99 /// instruction is to be evaluated at this point in the program
100 Statement,
101 /// This instruction may cause the control flow to diverge towards `block`,
102 /// either unconditionally if `conditional` is set to `false`, or after
103 /// evaluating the associated syntax node otherwise
104 Jump {
105 conditional: bool,
106 block: BlockId,
107 /// Set to `true` for the terminating jump instruction out of a
108 /// `finally` clause, the target block can be reinterpreted to the next
109 /// exception handler instead if the control flow is currently unwinding
110 finally_fallthrough: bool,
111 },
112 /// This instruction causes the control flow to unconditionally abort the
113 /// execution of the function, for example is JavaScript this can be
114 /// triggered by a `return` or `throw` statement
115 Return,
116}
117
118#[derive(Debug, Clone, Copy)]
119pub struct ExceptionHandler {
120 pub kind: ExceptionHandlerKind,
121 pub target: BlockId,
122}
123
124#[derive(Debug, Clone, Copy)]
125pub enum ExceptionHandlerKind {
126 Catch,
127 Finally,
128}
129
130/// The Display implementation for [ControlFlowGraph] prints a flowchart in
131/// mermaid.js syntax
132///
133/// By default the graph is printed in "simple" mode where each basic block is
134/// represented as a node in the graph:
135///
136/// ```mermaid
137/// flowchart TB
138/// block_0["<b>block_0</b><br/>Statement(JS_VARIABLE_DECLARATION 38..47)<br/>Jump { block: 1 }"]
139/// block_1["<b>block_1</b><br/>Jump { condition: JS_BINARY_EXPRESSION 49..58, block: 2 }<br/>Jump { block: 3 }"]
140/// block_2["<b>block_2</b><br/>Statement(JS_POST_UPDATE_EXPRESSION 60..63)"]
141/// block_3["<b>block_3</b><br/>Statement(JS_EXPRESSION_STATEMENT 260..277)"]
142///
143/// block_0 --> block_1
144/// block_1 -- "JS_BINARY_EXPRESSION 49..58" --> block_2
145/// block_1 --> block_3
146/// ```
147///
148/// However the graph can also be printed in "detailed" mode by formatting it
149/// in alternate mode using `{:#}`, this will print each basic block as a
150/// subgraph instead:
151///
152/// ```mermaid
153/// flowchart TB
154/// subgraph block_0
155/// direction TB
156/// block_0_inst_0["Statement(JS_VARIABLE_DECLARATION 38..47)"]
157/// block_0_inst_0 --> block_0_inst_1["Jump { block: 1 }"]
158/// end
159/// subgraph block_1
160/// direction TB
161/// block_1_inst_0["Jump { condition: JS_BINARY_EXPRESSION 49..58, block: 2 }"]
162/// block_1_inst_0 --> block_1_inst_1["Jump { block: 3 }"]
163/// end
164/// subgraph block_2
165/// direction TB
166/// block_2_inst_0["Statement(JS_POST_UPDATE_EXPRESSION 60..63)"]
167/// end
168/// subgraph block_3
169/// direction TB
170/// block_3_inst_0["Statement(JS_EXPRESSION_STATEMENT 260..277)"]
171/// end
172///
173/// block_0_inst_1 --> block_1
174/// block_1_inst_0 -- "JS_BINARY_EXPRESSION 49..58" --> block_2
175/// block_1_inst_1 --> block_3
176/// ```
177impl<L: Language> Display for ControlFlowGraph<L> {
178 fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
179 writeln!(fmt, "flowchart TB")?;
180
181 let mut links = FxHashMap::default();
182 for (id, block) in self.blocks.iter().enumerate() {
183 if fmt.alternate() {
184 writeln!(fmt, " subgraph block_{id}")?;
185 writeln!(fmt, " direction TB")?;
186 } else {
187 write!(fmt, " block_{id}[\"<b>block_{id}</b><br/>")?;
188 }
189
190 for (index, inst) in block.instructions.iter().enumerate() {
191 if fmt.alternate() {
192 write!(fmt, " ")?;
193
194 if let Some(index) = index.checked_sub(1) {
195 write!(fmt, "block_{id}_inst_{index} --> ")?;
196 }
197
198 writeln!(fmt, "block_{id}_inst_{index}[\"{inst}\"]")?;
199 } else {
200 if index > 0 {
201 write!(fmt, "<br/>")?;
202 }
203
204 write!(fmt, "{inst}")?;
205 }
206
207 if let InstructionKind::Jump {
208 conditional, block, ..
209 } = inst.kind
210 {
211 let condition = inst
212 .node
213 .as_ref()
214 .filter(|_| conditional)
215 .map(|node| (node.kind(), node.text_trimmed_range()));
216 links.insert((id, index, block.index()), condition);
217 }
218 }
219
220 if fmt.alternate() {
221 writeln!(fmt, " end")?;
222 } else {
223 writeln!(fmt, "\"]")?;
224 }
225 }
226
227 writeln!(fmt)?;
228
229 for ((id, index, to), condition) in links {
230 write!(fmt, " block_{id}")?;
231
232 if fmt.alternate() {
233 write!(fmt, "_inst_{index}")?;
234 }
235
236 if let Some((cond, range)) = condition {
237 write!(fmt, " -- \"{cond:?} {range:?}\"")?;
238 }
239
240 writeln!(fmt, " --> block_{to}")?;
241 }
242
243 Ok(())
244 }
245}
246
247impl<L: Language> Display for Instruction<L> {
248 fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
249 match self.kind {
250 InstructionKind::Statement => {
251 if let Some(node) = &self.node {
252 write!(
253 fmt,
254 "Statement({:?} {:?})",
255 node.kind(),
256 node.text_trimmed_range()
257 )
258 } else {
259 write!(fmt, "Statement")
260 }
261 }
262
263 InstructionKind::Jump {
264 conditional: true,
265 block,
266 ..
267 } if self.node.is_some() => {
268 // SAFETY: Checked by the above call to `is_some`
269 let node = self.node.as_ref().unwrap();
270 write!(
271 fmt,
272 "Jump {{ condition: {:?} {:?}, block: {} }}",
273 node.kind(),
274 node.text_trimmed_range(),
275 block.index()
276 )
277 }
278
279 InstructionKind::Jump { block, .. } => {
280 write!(fmt, "Jump {{ block: {} }}", block.index())
281 }
282
283 InstructionKind::Return => {
284 if let Some(node) = &self.node {
285 write!(
286 fmt,
287 "Return({:?} {:?})",
288 node.kind(),
289 node.text_trimmed_range()
290 )
291 } else {
292 write!(fmt, "Return")
293 }
294 }
295 }
296 }
297}