1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
use crate::{
    asm_generation::{
        register_sequencer::RegisterSequencer, RegisterAllocationStatus, RegisterPool,
    },
    asm_lang::{virtual_register::*, Op, VirtualOp},
};

use std::collections::{BTreeSet, HashMap};

use either::Either;
use petgraph::graph::NodeIndex;

pub type InterferenceGraph =
    petgraph::stable_graph::StableGraph<VirtualRegister, (), petgraph::Undirected>;

/// Given a list of instructions `ops` of a program, do liveness analysis for the full program.
///
/// A virtual registers is live at some point in the program if it has previously been defined by
/// an instruction and will be used by an instruction in the future.
///
/// The analysis function below assumes that it is possible that a virtual register is assigned
/// more than once. That is, it doesn't assume that the intermediate assembly is in SSA form.
///
/// Two tables are generated: `live_in` and `live_out`. Each row in the tables corresponds to an
/// instruction in the program.
/// * A virtual register is in the `live_out` table for a given instruction if it is live on any
/// of that node's out-edges
/// * A virtual register is in the `live_in` table for a given instruction if it is live on any
/// of that node's in-edges
///
///
/// Algorithm:
/// ===============================================================================================
/// for each instruction op:
///     live_in(op) = {}
///     live_out(op) = {}
///     def(op) = list of virtual registers defined by op
///     use(op) = list of virtual registers used by op
///
/// repeat
///     for each instruction op (traversed in reverse topological order of the CFG)
///         prev_live_in(op) = live_in(op)
///         prev_live_out(op) = live_out(op)
///         live_out(op) = live_in(s_1) UNION live_in(s_2) UNION live_in(s_3) UNION ...
///                        where s_1, s_2, s_3, ... are all the successors of op in the CFG.
///         live_in(op) = use(op) UNION (live_out(op) - def(op))
/// until     prev_live_in(op) = live_in(op)
///       AND prev_live_out(op) = live_out(op)
/// ===============================================================================================
///
/// Note that we're only looking at registers that have the enum variant
/// VirtualRegister::Virtual(_). All other registers (i.e. ones with the
/// VirtualRegister::Constant(_) variant) are assumed to be live throughout the full program.
///
/// This function finally returns `live_out` because it has all the liveness information needed.
/// `live_in` is computed because it is needed to compute `live_out` iteratively.
///
pub(crate) fn liveness_analysis(ops: &[Op]) -> HashMap<usize, BTreeSet<VirtualRegister>> {
    // Hash maps that will reprsent the live_in and live_out tables. The key of each hash map is
    // simply the index of each instruction in the `ops` vector.
    let mut live_in: HashMap<usize, BTreeSet<VirtualRegister>> =
        HashMap::from_iter((0..ops.len()).into_iter().map(|idx| (idx, BTreeSet::new())));
    let mut live_out: HashMap<usize, BTreeSet<VirtualRegister>> =
        HashMap::from_iter((0..ops.len()).into_iter().map(|idx| (idx, BTreeSet::new())));

    let mut modified = true;
    while modified {
        modified = false;
        // Iterate in reverse topological order of the CFG (which is basically the same as the
        // reverse order of `ops`. This makes the outer `while` loop converge faster.
        for (ix, op) in ops.iter().rev().enumerate() {
            let rev_ix = ops.len() - ix - 1;

            // Get use and def vectors without any of the Constant registers
            let mut op_use = op.use_registers();
            let mut op_def = op.def_registers();
            op_use.retain(|&reg| matches!(reg, VirtualRegister::Virtual(_)));
            op_def.retain(|&reg| matches!(reg, VirtualRegister::Virtual(_)));

            let prev_live_out_op = live_out.get(&rev_ix).expect("ix must exist").clone();
            let prev_live_in_op = live_in.get(&rev_ix).expect("ix must exist").clone();

            // Compute live_out(op) = live_in(s_1) UNION live_in(s_2) UNION ..., where s1, s_2, ...
            // are successors of op
            let live_out_op = live_out.get_mut(&rev_ix).expect("ix must exist");
            for s in &op.successors(rev_ix, ops) {
                for l in live_in.get(s).expect("ix must exist") {
                    live_out_op.insert(l.clone());
                }
            }

            // Compute live_in(op) = use(op) UNION (live_out(op) - def(op))
            // Add use(op)
            let live_in_op = live_in.get_mut(&rev_ix).expect("ix must exist");
            for u in op_use {
                live_in_op.insert(u.clone());
            }

            // Add live_out(op) - def(op)
            let mut live_out_op_minus_defs = live_out_op.clone();
            for d in &op_def {
                live_out_op_minus_defs.remove(d);
            }
            for l in &live_out_op_minus_defs {
                live_in_op.insert(l.clone());
            }

            // Did anything change in this iteration?
            modified |= (prev_live_in_op != *live_in_op) || (prev_live_out_op != *live_out_op);
        }
    }

    live_out
}

/// Given a list of instructions `ops` and a `live_out` table computed using the method
/// `liveness_analysis()`, create an interference graph (aka a "conflict" graph):
/// * Nodes = virtual registers
/// * Edges = overlapping live ranges
///
/// Two virtual registers interfere if there exists a point in the program where both are
/// simultaneously live. If `v1` and `v2` interfere, they cannot be allocated to the same register.
///
/// Algorithm:
/// ===============================================================================================
/// 1. create a graph node for every virtual register used.
/// 2. for a MOVE "v <= c" with live_out virtual registers b1, ... bn for v:
///        add edges (v, b_1), ..., (v, b_n) for any b_i different from c.
/// 3. for non-MOVE def of virtual register v with live_out virtual registers b_1, ..., b_n:
///        add edges (v, b_1), ..., (v, b_n)
/// ===============================================================================================
///
pub(crate) fn create_interference_graph(
    ops: &[Op],
    live_out: &HashMap<usize, BTreeSet<VirtualRegister>>,
) -> (InterferenceGraph, HashMap<VirtualRegister, NodeIndex>) {
    let mut interference_graph = InterferenceGraph::with_capacity(0, 0);

    // Figure out a mapping between a given VirtualRegister and its corresponding NodeIndex
    // in the interference graph.
    let mut reg_to_node_map: HashMap<VirtualRegister, NodeIndex> = HashMap::new();

    // Get all virtual registers used by the intermediate assembly and add them to the graph
    ops.iter()
        .fold(BTreeSet::new(), |mut tree, elem| {
            let mut regs = elem.registers();
            regs.retain(|&reg| matches!(reg, VirtualRegister::Virtual(_)));
            tree.extend(regs.into_iter());
            tree
        })
        .iter()
        .for_each(|&reg| {
            reg_to_node_map.insert(reg.clone(), interference_graph.add_node(reg.clone()));
        });

    for (ix, regs) in live_out {
        match &ops[*ix].opcode {
            Either::Left(VirtualOp::MOVE(v, c)) => {
                if let Some(ix1) = reg_to_node_map.get(v) {
                    for b in regs.iter() {
                        if let Some(ix2) = reg_to_node_map.get(b) {
                            // Add edge (v, b) if b != c
                            // Also, avoid adding self edges and edges that already exist
                            if *b != *c && *b != *v && !interference_graph.contains_edge(*ix1, *ix2)
                            {
                                interference_graph.add_edge(*ix1, *ix2, ());
                            }
                        }
                    }
                }
            }
            _ => {
                for v in &ops[*ix].def_registers() {
                    if let Some(ix1) = reg_to_node_map.get(v) {
                        for b in regs.iter() {
                            if let Some(ix2) = reg_to_node_map.get(b) {
                                // Add edge (v, b)
                                // Avoid adding self edges and edges that already exist
                                if *b != **v && !interference_graph.contains_edge(*ix1, *ix2) {
                                    interference_graph.add_edge(*ix1, *ix2, ());
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    (interference_graph, reg_to_node_map)
}

/// Given a list of instructions `ops` and a corresponding interference_graph, generate a new,
/// smaller list of instructions, where unnecessary MOVE instructions have been removed. When an
/// unnecessary MOVE is detected and removed, the two virtual registers used by the MOVE are said
/// to be "coalesced" and the two corresponding nodes in the graph are then merged.
///
/// Two important aspects of this for our implementation:
/// * When two registers are coalesced, a new node with a new virtual register (generated using the
///   register sequencer) is created in the interference graph.
/// * When a MOVE instruction is removed, the offset of each subsequent instruction has to be
/// updated, as well as the immediate values for some or all jump instructions (`ji`, `jnei`, and
/// `jnzi for now).
///
pub(crate) fn coalesce_registers(
    ops: &[Op],
    interference_graph: &mut InterferenceGraph,
    reg_to_node_map: &mut HashMap<VirtualRegister, NodeIndex>,
    register_sequencer: &mut RegisterSequencer,
) -> Vec<Op> {
    // A map from the virtual registers that are removed to the virtual registers that they are
    // replaced with during the coalescing process.
    let mut reg_to_reg_map: HashMap<VirtualRegister, VirtualRegister> = HashMap::new();

    // To hold the final *reduced* list of ops
    let mut reduced_ops: Vec<Op> = vec![];

    for op in ops {
        match &op.opcode {
            Either::Left(VirtualOp::MOVE(x, y)) => {
                match (x, y) {
                    (VirtualRegister::Virtual(_), VirtualRegister::Virtual(_)) => {
                        // Use reg_to_reg_map to figure out what x and y have been replaced
                        // with. We keep looking for mappings within reg_to_reg_map until we find a
                        // register that doesn't map to any other.
                        let regs = vec![x.clone(), y.clone()]
                            .iter()
                            .map(|reg| {
                                let mut temp = reg.clone();
                                while let Some(t) = reg_to_reg_map.get(&temp) {
                                    temp = t.clone();
                                }
                                temp
                            })
                            .collect::<Vec<_>>();
                        let (r1, r2) = (&regs[0], &regs[1]);

                        // Find the interference graph nodes that corresponding to r1 and r2
                        let ix1 = reg_to_node_map.get(r1).unwrap();
                        let ix2 = reg_to_node_map.get(r2).unwrap();

                        // If r1 and r2 are the same, the MOVE instruction can be safely removed,
                        // i.e., not added to reduced_ops
                        if r1 == r2 {
                            continue;
                        }

                        // If r1 and r2 are connected in the interference graph (i.e. their
                        // respective liveness ranges overalp), preserve the MOVE instruction by
                        // adding it to reduced_ops
                        if interference_graph.contains_edge(*ix1, *ix2) {
                            reduced_ops.push(op.clone());
                            continue;
                        }

                        // The MOVE instruction can now be safely removed. That is, we simply don't
                        // add it to the reduced_ops vector. Also, we combine the two nodes ix1 and
                        // ix2 in the graph by creating a new node that inherits the edges of both
                        // ix1 and ix2, and then we remove ix1 and ix2 from the graph. We also have
                        // to do some bookkeeping.
                        //
                        // Note that because the interference graph is of type StableGraph, the
                        // node index corresponding to each virtual register does not change when
                        // some graph nodes are added or removed.

                        // Create a new virtual register to represent the result of coalescing of
                        // r1 and r2. Then create a node for it in the graph
                        let new_reg = register_sequencer.next();
                        let new_ix = interference_graph.add_node(new_reg.clone());

                        // Add all of ix1(r1)'s edges
                        for neighbor in interference_graph.neighbors(*ix1).collect::<Vec<_>>() {
                            interference_graph.add_edge(neighbor, new_ix, ());
                        }

                        // Add all of ix2(r2)'s edges
                        for neighbor in interference_graph.neighbors(*ix2).collect::<Vec<_>>() {
                            if !interference_graph.contains_edge(neighbor, new_ix) {
                                interference_graph.add_edge(neighbor, new_ix, ());
                            }
                        }

                        // Now remove ix1(r1) and ix2(r2)
                        interference_graph.remove_node(*ix1);
                        interference_graph.remove_node(*ix2);

                        // Update the register maps
                        reg_to_node_map.insert(new_reg.clone(), new_ix);
                        reg_to_node_map.insert(r1.clone(), new_ix);
                        reg_to_node_map.insert(r2.clone(), new_ix);
                        reg_to_reg_map.insert(r1.clone(), new_reg.clone());
                        reg_to_reg_map.insert(r2.clone(), new_reg.clone());
                    }
                    _ => {
                        // Preserve the MOVE instruction if either registers used in the MOVE is
                        // special registers (i.e. *not* a VirtualRegister::Virtual(_))
                        reduced_ops.push(op.clone());
                    }
                }
            }
            _ => {
                // Preserve all other instructions
                reduced_ops.push(op.clone());
            }
        }
    }

    // Create a *final* reg-to-reg map that We keep looking for mappings within reg_to_reg_map
    // until we find a register that doesn't map to any other.
    let mut final_reg_to_reg_map: HashMap<VirtualRegister, VirtualRegister> = HashMap::new();
    for reg in reg_to_reg_map.keys() {
        let mut temp = reg;
        while let Some(t) = reg_to_reg_map.get(temp) {
            temp = t;
        }
        final_reg_to_reg_map.insert(reg.clone(), temp.clone());
    }

    // Update the registers for all instructions using final_reg_to_reg_map
    for new_op in &mut reduced_ops {
        *new_op = new_op.update_register(&final_reg_to_reg_map);
    }

    reduced_ops
}

/// Given an interference graph and a integer k, figure out if the graph k-colorable. Graph
/// coloring is an NP-complete problem, but the algorithm below is a simple stack based
/// approximation that relies on the fact that any node n in the graph that has fewer than k
/// neighbors can always be colored.
///
/// Algorithm:
/// ===============================================================================================
/// 1. Pick any node n such that degree(n) < k and put it on the stack along with its neighbors.
/// 2. Remove node n and all its edges from the graph
///    - This may make some new nodes have fewer than k neighbours which is nice.
/// 3. If some vertex n still has k or more neighbors, then the graph is not k colorable, and we
///    have to spill. For now, we will just error out, but we may be able to do better in the
///    future.
/// ===============================================================================================
///
/// As we don't implement spilling just yet, I've modified the algorithm above to assume k=infinity
/// and moving the colorability checking until the register assignment phase. The reason for this
/// is that the algorithm above can be too conservative and may bail our early even though a valid
/// assignment is actually available. We can revisit this decision when decide to implement
/// spilling.
///
pub(crate) fn color_interference_graph(
    interference_graph: &mut InterferenceGraph,
) -> Vec<(VirtualRegister, BTreeSet<VirtualRegister>)> {
    let mut stack: Vec<(VirtualRegister, BTreeSet<VirtualRegister>)> = vec![];

    while let Some(node) = interference_graph.node_indices().next() {
        let neighbors = interference_graph
            .neighbors(node)
            .map(|n| interference_graph[n].clone())
            .collect();
        stack.push((
            interference_graph
                .remove_node(node)
                .expect("Node must exist"),
            neighbors,
        ));
    }

    stack
}

/// Use the stack generated by the coloring algorithm to figure out a register assignment for each
/// virtual register. The idea here is to successively pop the stack while selecting a register to
/// each virtual register. A register r is available to a virtual register v if the intersection of
/// the neighbors of v (available from the stack) and the list of virtual registers already used by
/// r (available in the used_by field) is empty.
///
pub(crate) fn assign_registers(
    stack: &mut Vec<(VirtualRegister, BTreeSet<VirtualRegister>)>,
) -> RegisterPool {
    let mut pool = RegisterPool::init();
    while let Some((reg, neighbors)) = stack.pop() {
        if matches!(reg, VirtualRegister::Virtual(_)) {
            let available =
                pool.registers
                    .iter_mut()
                    .find(|RegisterAllocationStatus { reg: _, used_by }| {
                        neighbors.intersection(used_by).count() == 0
                    });

            if let Some(RegisterAllocationStatus { reg: _, used_by }) = available {
                used_by.insert(reg.clone());
            } else {
                // Error out for now if no available register is found
                unimplemented!(
                    "The allocator cannot resolve a register mapping for this program. \
                     This is a temporary artifact of the extremely early stage version \
                     of this language. Try to lower the number of variables you use."
                );
            }
        }
    }

    pool
}