Skip to main content

luadec_rust/lua51/
dominator.rs

1use crate::lua51::cfg::{ControlFlowGraph, EdgeKind};
2
3/// Dominator tree for a control flow graph.
4///
5/// Uses the iterative data-flow algorithm (Cooper, Harvey, Kennedy):
6/// simple, correct, and fast enough for the small CFGs produced by Lua functions.
7#[derive(Debug)]
8pub struct DominatorTree {
9    /// Immediate dominator of each block. `idom[entry] == entry`.
10    idom: Vec<usize>,
11    /// Children in the dominator tree (blocks immediately dominated).
12    children: Vec<Vec<usize>>,
13    /// Dominance frontier for each block.
14    frontier: Vec<Vec<usize>>,
15    /// Number of blocks.
16    num_blocks: usize,
17}
18
19impl DominatorTree {
20    /// Build the dominator tree from a CFG. Entry block is assumed to be block 0.
21    pub fn build(cfg: &ControlFlowGraph) -> Self {
22        let n = cfg.num_blocks();
23        if n == 0 {
24            return DominatorTree {
25                idom: Vec::new(),
26                children: Vec::new(),
27                frontier: Vec::new(),
28                num_blocks: 0,
29            };
30        }
31
32        // Compute reverse postorder (RPO) numbering via DFS.
33        let rpo = compute_rpo(cfg, n);
34        let rpo_order = &rpo.order; // block IDs in RPO
35        let rpo_num = &rpo.number; // block ID -> RPO number
36
37        // Iterative dominator computation (Cooper, Harvey, Kennedy 2001).
38        const UNDEF: usize = usize::MAX;
39        let mut idom = vec![UNDEF; n];
40        idom[0] = 0; // entry dominates itself
41
42        let intersect = |idom: &[usize], rpo_num: &[usize], mut b1: usize, mut b2: usize| -> usize {
43            while b1 != b2 {
44                while rpo_num[b1] > rpo_num[b2] {
45                    b1 = idom[b1];
46                }
47                while rpo_num[b2] > rpo_num[b1] {
48                    b2 = idom[b2];
49                }
50            }
51            b1
52        };
53
54        let mut changed = true;
55        while changed {
56            changed = false;
57            // Process in RPO, skipping entry (index 0 in rpo_order).
58            for &b in &rpo_order[1..] {
59                let preds = &cfg.blocks[b].predecessors;
60                // Find first processed predecessor.
61                let mut new_idom = UNDEF;
62                for &p in preds {
63                    if idom[p] != UNDEF {
64                        new_idom = p;
65                        break;
66                    }
67                }
68                if new_idom == UNDEF {
69                    continue; // unreachable block
70                }
71                // Intersect with remaining processed predecessors.
72                for &p in preds {
73                    if p != new_idom && idom[p] != UNDEF {
74                        new_idom = intersect(&idom, rpo_num, p, new_idom);
75                    }
76                }
77                if idom[b] != new_idom {
78                    idom[b] = new_idom;
79                    changed = true;
80                }
81            }
82        }
83
84        // Build children lists.
85        let mut children = vec![Vec::new(); n];
86        for b in 0..n {
87            if idom[b] != b && idom[b] != UNDEF {
88                children[idom[b]].push(b);
89            }
90        }
91
92        // Compute dominance frontiers.
93        let frontier = compute_dominance_frontiers(&idom, cfg, n);
94
95        DominatorTree {
96            idom,
97            children,
98            frontier,
99            num_blocks: n,
100        }
101    }
102
103    /// Immediate dominator of block `b`.
104    pub fn idom(&self, b: usize) -> usize {
105        self.idom[b]
106    }
107
108    /// Children of block `b` in the dominator tree.
109    pub fn children(&self, b: usize) -> &[usize] {
110        &self.children[b]
111    }
112
113    /// Dominance frontier of block `b`.
114    pub fn frontier(&self, b: usize) -> &[usize] {
115        &self.frontier[b]
116    }
117
118    /// Check if block `a` dominates block `b`.
119    pub fn dominates(&self, a: usize, b: usize) -> bool {
120        const UNDEF: usize = usize::MAX;
121        let mut cur = b;
122        loop {
123            if cur == a {
124                return true;
125            }
126            let idom = self.idom[cur];
127            if idom == UNDEF || idom == cur {
128                return false; // unreachable or reached entry without finding a
129            }
130            cur = idom;
131        }
132    }
133
134    /// Number of blocks.
135    pub fn num_blocks(&self) -> usize {
136        self.num_blocks
137    }
138}
139
140impl std::fmt::Display for DominatorTree {
141    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
142        writeln!(f, "Dominator Tree:")?;
143        for b in 0..self.num_blocks {
144            writeln!(
145                f,
146                "  B{}: idom=B{}, children={:?}, frontier={:?}",
147                b, self.idom[b], self.children[b], self.frontier[b]
148            )?;
149        }
150        Ok(())
151    }
152}
153
154// --- Natural loop detection ---
155
156/// A natural loop identified from back-edges in the CFG.
157#[derive(Debug, Clone)]
158pub struct NaturalLoop {
159    /// The loop header block (target of back-edge, dominates all loop blocks).
160    pub header: usize,
161    /// The block containing the back-edge to the header.
162    pub latch: usize,
163    /// All blocks in the loop body (including header and latch).
164    pub body: Vec<usize>,
165    /// The kind of loop, if identifiable from Lua patterns.
166    pub kind: LoopKind,
167}
168
169/// Classification of Lua loop constructs.
170#[derive(Debug, Clone, Copy, PartialEq, Eq)]
171pub enum LoopKind {
172    /// `for i = init, limit, step do ... end` (FORPREP/FORLOOP)
173    NumericFor,
174    /// `for k, v in iter do ... end` (TFORLOOP)
175    GenericFor,
176    /// `while cond do ... end` or `repeat ... until cond`
177    WhileRepeat,
178}
179
180/// Find all natural loops in the CFG using dominator information.
181///
182/// A back-edge is an edge N -> H where H dominates N. Each back-edge
183/// defines a natural loop whose body is the set of blocks that can reach N
184/// without going through H.
185pub fn find_loops(cfg: &ControlFlowGraph, dom: &DominatorTree) -> Vec<NaturalLoop> {
186    let mut loops = Vec::new();
187
188    for edge in &cfg.edges {
189        // A back-edge: edge.to dominates edge.from
190        if dom.dominates(edge.to, edge.from) {
191            let header = edge.to;
192            let latch = edge.from;
193
194            // Collect loop body: all blocks that can reach `latch` without
195            // going through `header`.
196            let body = collect_loop_body(cfg, header, latch);
197
198            // Classify the loop kind based on the edge type and instructions.
199            let kind = classify_loop(cfg, edge.kind, header);
200
201            loops.push(NaturalLoop {
202                header,
203                latch,
204                body,
205                kind,
206            });
207        }
208    }
209
210    // Sort loops by header for deterministic ordering.
211    loops.sort_by_key(|l| (l.header, l.latch));
212    loops
213}
214
215/// Collect all blocks in the natural loop defined by the back-edge latch -> header.
216fn collect_loop_body(cfg: &ControlFlowGraph, header: usize, latch: usize) -> Vec<usize> {
217    let mut body = vec![false; cfg.num_blocks()];
218    body[header] = true;
219
220    if header == latch {
221        return vec![header];
222    }
223
224    body[latch] = true;
225    let mut stack = vec![latch];
226
227    // Work backwards from latch, adding predecessors until we reach header.
228    while let Some(b) = stack.pop() {
229        for &pred in &cfg.blocks[b].predecessors {
230            if !body[pred] {
231                body[pred] = true;
232                stack.push(pred);
233            }
234        }
235    }
236
237    body.iter()
238        .enumerate()
239        .filter_map(|(i, &in_loop)| if in_loop { Some(i) } else { None })
240        .collect()
241}
242
243/// Classify a loop based on the back-edge kind and header instructions.
244fn classify_loop(cfg: &ControlFlowGraph, edge_kind: EdgeKind, header: usize) -> LoopKind {
245    match edge_kind {
246        EdgeKind::ForLoopBack => LoopKind::NumericFor,
247        EdgeKind::TForLoopBack => LoopKind::GenericFor,
248        _ => {
249            // Check if header block ends with FORLOOP
250            let block = &cfg.blocks[header];
251            let last_inst = &cfg.instructions[block.end];
252            use crate::lua51::opcodes::OpCode;
253            match last_inst.op {
254                OpCode::ForLoop => LoopKind::NumericFor,
255                _ => LoopKind::WhileRepeat,
256            }
257        }
258    }
259}
260
261// --- RPO computation ---
262
263struct Rpo {
264    /// Block IDs in reverse postorder.
265    order: Vec<usize>,
266    /// RPO number for each block ID.
267    number: Vec<usize>,
268}
269
270fn compute_rpo(cfg: &ControlFlowGraph, n: usize) -> Rpo {
271    let mut visited = vec![false; n];
272    let mut postorder = Vec::with_capacity(n);
273
274    fn dfs(
275        b: usize,
276        cfg: &ControlFlowGraph,
277        visited: &mut Vec<bool>,
278        postorder: &mut Vec<usize>,
279    ) {
280        visited[b] = true;
281        for &succ in &cfg.blocks[b].successors {
282            if !visited[succ] {
283                dfs(succ, cfg, visited, postorder);
284            }
285        }
286        postorder.push(b);
287    }
288
289    dfs(0, cfg, &mut visited, &mut postorder);
290
291    // Reverse postorder
292    postorder.reverse();
293    let mut number = vec![0usize; n];
294    for (i, &b) in postorder.iter().enumerate() {
295        number[b] = i;
296    }
297
298    Rpo {
299        order: postorder,
300        number,
301    }
302}
303
304/// Compute dominance frontiers using the algorithm from
305/// Cooper, Harvey, Kennedy (2001).
306fn compute_dominance_frontiers(idom: &[usize], cfg: &ControlFlowGraph, n: usize) -> Vec<Vec<usize>> {
307    let mut frontiers = vec![Vec::new(); n];
308
309    for b in 0..n {
310        let preds = &cfg.blocks[b].predecessors;
311        if preds.len() >= 2 {
312            for &p in preds {
313                let mut runner = p;
314                while runner != idom[b] && runner != usize::MAX {
315                    if !frontiers[runner].contains(&b) {
316                        frontiers[runner].push(b);
317                    }
318                    if runner == idom[runner] {
319                        break;
320                    }
321                    runner = idom[runner];
322                }
323            }
324        }
325    }
326
327    frontiers
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333    use crate::lua51::cfg::ControlFlowGraph;
334    use crate::lua51::instruction::{encode_abc, encode_abx, encode_asbx};
335    use crate::lua51::opcodes::OpCode;
336
337    #[test]
338    fn dominator_linear() {
339        // Linear code: single block, entry dominates itself
340        let code = vec![
341            encode_abx(OpCode::LoadK, 0, 0),
342            encode_abc(OpCode::Return, 0, 1, 0),
343        ];
344        let cfg = ControlFlowGraph::build(&code);
345        let dom = DominatorTree::build(&cfg);
346        assert_eq!(dom.idom(0), 0);
347        assert!(dom.dominates(0, 0));
348    }
349
350    #[test]
351    fn dominator_diamond() {
352        // Diamond CFG:
353        // [0] EQ 0 0 1      block 0: [0,1]
354        // [1] JMP +1         -> block 2 (false), block 1 (true)
355        // [2] LOADK 2 0      block 1: [2]
356        // [3] LOADK 3 0      block 2: [3]
357        // [4] RETURN 0 1     block 2 continues: [3,4] -- wait, 3 is a leader
358        //
359        // Actually let me lay this out more carefully:
360        // [0] EQ 0 0 1       -- test
361        // [1] JMP +2         -- if false, jump to [4]
362        // [2] LOADK 2 0      -- true path
363        // [3] JMP +0         -- jump to [4]
364        // [4] RETURN 0 1     -- merge point
365        let code = vec![
366            encode_abc(OpCode::Eq, 0, 0, 1),
367            encode_asbx(OpCode::Jmp, 0, 2),
368            encode_abx(OpCode::LoadK, 2, 0),
369            encode_asbx(OpCode::Jmp, 0, 0),
370            encode_abc(OpCode::Return, 0, 1, 0),
371        ];
372        let cfg = ControlFlowGraph::build(&code);
373        let dom = DominatorTree::build(&cfg);
374
375        // Block 0 dominates all blocks
376        for b in 0..cfg.num_blocks() {
377            assert!(dom.dominates(0, b), "block 0 should dominate block {}", b);
378        }
379    }
380
381    #[test]
382    fn detect_numeric_for_loop() {
383        // [0] LOADK 0 0      -- init
384        // [1] LOADK 1 1      -- limit
385        // [2] LOADK 2 2      -- step
386        // [3] FORPREP 0 +1   -- jump to [5]
387        // [4] LOADK 4 0      -- loop body
388        // [5] FORLOOP 0 -2   -- back to [4]
389        // [6] RETURN 0 1
390        let code = vec![
391            encode_abx(OpCode::LoadK, 0, 0),
392            encode_abx(OpCode::LoadK, 1, 1),
393            encode_abx(OpCode::LoadK, 2, 2),
394            encode_asbx(OpCode::ForPrep, 0, 1),
395            encode_abx(OpCode::LoadK, 4, 0),
396            encode_asbx(OpCode::ForLoop, 0, -2),
397            encode_abc(OpCode::Return, 0, 1, 0),
398        ];
399        let cfg = ControlFlowGraph::build(&code);
400        let dom = DominatorTree::build(&cfg);
401        let loops = find_loops(&cfg, &dom);
402
403        assert_eq!(loops.len(), 1);
404        assert_eq!(loops[0].kind, LoopKind::NumericFor);
405        assert!(loops[0].body.len() >= 1);
406    }
407
408    #[test]
409    fn detect_while_loop() {
410        // Simple while-style loop:
411        // [0] EQ 0 0 1       -- while condition test
412        // [1] JMP +2         -- if false, exit to [4]
413        // [2] LOADK 2 0      -- loop body
414        // [3] JMP -4         -- jump back to [0]
415        // [4] RETURN 0 1
416        let code = vec![
417            encode_abc(OpCode::Eq, 0, 0, 1),
418            encode_asbx(OpCode::Jmp, 0, 2),
419            encode_abx(OpCode::LoadK, 2, 0),
420            encode_asbx(OpCode::Jmp, 0, -4),
421            encode_abc(OpCode::Return, 0, 1, 0),
422        ];
423        let cfg = ControlFlowGraph::build(&code);
424        let dom = DominatorTree::build(&cfg);
425        let loops = find_loops(&cfg, &dom);
426
427        assert_eq!(loops.len(), 1);
428        assert_eq!(loops[0].kind, LoopKind::WhileRepeat);
429    }
430
431    #[test]
432    fn empty_cfg_dominator() {
433        let cfg = ControlFlowGraph::build(&[]);
434        let dom = DominatorTree::build(&cfg);
435        assert_eq!(dom.num_blocks(), 0);
436        let loops = find_loops(&cfg, &dom);
437        assert!(loops.is_empty());
438    }
439}