Skip to main content

luadec_rust/lua51/
dominator.rs

1use crate::lua51::cfg::{ControlFlowGraph, EdgeKind};
2
3/// Dominator tree for a control flow graph.
4///
5/// Uses the iterative data-flow algorithm (Cooper, Harvey, Kennedy):
6/// simple, correct, and fast enough for the small CFGs produced by Lua functions.
7#[derive(Debug)]
8pub struct DominatorTree {
9    /// Immediate dominator of each block. `idom[entry] == entry`.
10    idom: Vec<usize>,
11    /// Children in the dominator tree (blocks immediately dominated).
12    children: Vec<Vec<usize>>,
13    /// Dominance frontier for each block.
14    frontier: Vec<Vec<usize>>,
15    /// Number of blocks.
16    num_blocks: usize,
17}
18
19impl DominatorTree {
20    /// Build the dominator tree from a CFG. Entry block is assumed to be block 0.
21    pub fn build(cfg: &ControlFlowGraph) -> Self {
22        let n = cfg.num_blocks();
23        if n == 0 {
24            return DominatorTree {
25                idom: Vec::new(),
26                children: Vec::new(),
27                frontier: Vec::new(),
28                num_blocks: 0,
29            };
30        }
31
32        // Compute reverse postorder (RPO) numbering via DFS.
33        let rpo = compute_rpo(cfg, n);
34        let rpo_order = &rpo.order; // block IDs in RPO
35        let rpo_num = &rpo.number; // block ID -> RPO number
36
37        // Iterative dominator computation (Cooper, Harvey, Kennedy 2001).
38        const UNDEF: usize = usize::MAX;
39        let mut idom = vec![UNDEF; n];
40        idom[0] = 0; // entry dominates itself
41
42        let intersect = |idom: &[usize], rpo_num: &[usize], mut b1: usize, mut b2: usize| -> usize {
43            while b1 != b2 {
44                while rpo_num[b1] > rpo_num[b2] {
45                    b1 = idom[b1];
46                }
47                while rpo_num[b2] > rpo_num[b1] {
48                    b2 = idom[b2];
49                }
50            }
51            b1
52        };
53
54        let mut changed = true;
55        while changed {
56            changed = false;
57            // Process in RPO, skipping entry (index 0 in rpo_order).
58            for &b in &rpo_order[1..] {
59                let preds = &cfg.blocks[b].predecessors;
60                // Find first processed predecessor.
61                let mut new_idom = UNDEF;
62                for &p in preds {
63                    if idom[p] != UNDEF {
64                        new_idom = p;
65                        break;
66                    }
67                }
68                if new_idom == UNDEF {
69                    continue; // unreachable block
70                }
71                // Intersect with remaining processed predecessors.
72                for &p in preds {
73                    if p != new_idom && idom[p] != UNDEF {
74                        new_idom = intersect(&idom, rpo_num, p, new_idom);
75                    }
76                }
77                if idom[b] != new_idom {
78                    idom[b] = new_idom;
79                    changed = true;
80                }
81            }
82        }
83
84        // Build children lists.
85        let mut children = vec![Vec::new(); n];
86        for b in 0..n {
87            if idom[b] != b && idom[b] != UNDEF {
88                children[idom[b]].push(b);
89            }
90        }
91
92        // Compute dominance frontiers.
93        let frontier = compute_dominance_frontiers(&idom, cfg, n);
94
95        DominatorTree {
96            idom,
97            children,
98            frontier,
99            num_blocks: n,
100        }
101    }
102
103    /// Immediate dominator of block `b`.
104    pub fn idom(&self, b: usize) -> usize {
105        self.idom[b]
106    }
107
108    /// Children of block `b` in the dominator tree.
109    pub fn children(&self, b: usize) -> &[usize] {
110        &self.children[b]
111    }
112
113    /// Dominance frontier of block `b`.
114    pub fn frontier(&self, b: usize) -> &[usize] {
115        &self.frontier[b]
116    }
117
118    /// Check if block `a` dominates block `b`.
119    pub fn dominates(&self, a: usize, b: usize) -> bool {
120        const UNDEF: usize = usize::MAX;
121        let mut cur = b;
122        loop {
123            if cur == a {
124                return true;
125            }
126            let idom = self.idom[cur];
127            if idom == UNDEF || idom == cur {
128                return false; // unreachable or reached entry without finding a
129            }
130            cur = idom;
131        }
132    }
133
134    /// Number of blocks.
135    pub fn num_blocks(&self) -> usize {
136        self.num_blocks
137    }
138}
139
140impl std::fmt::Display for DominatorTree {
141    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
142        writeln!(f, "Dominator Tree:")?;
143        for b in 0..self.num_blocks {
144            writeln!(
145                f,
146                "  B{}: idom=B{}, children={:?}, frontier={:?}",
147                b, self.idom[b], self.children[b], self.frontier[b]
148            )?;
149        }
150        Ok(())
151    }
152}
153
154// --- Natural loop detection ---
155
156/// A natural loop identified from back-edges in the CFG.
157#[derive(Debug, Clone)]
158pub struct NaturalLoop {
159    /// The loop header block (target of back-edge, dominates all loop blocks).
160    pub header: usize,
161    /// The block containing the back-edge to the header.
162    pub latch: usize,
163    /// All blocks in the loop body (including header and latch).
164    pub body: Vec<usize>,
165    /// The kind of loop, if identifiable from Lua patterns.
166    pub kind: LoopKind,
167}
168
169/// Classification of Lua loop constructs.
170#[derive(Debug, Clone, Copy, PartialEq, Eq)]
171pub enum LoopKind {
172    /// `for i = init, limit, step do ... end` (FORPREP/FORLOOP)
173    NumericFor,
174    /// `for k, v in iter do ... end` (TFORLOOP)
175    GenericFor,
176    /// `while cond do ... end` or `repeat ... until cond`
177    WhileRepeat,
178}
179
180/// Find all natural loops in the CFG using dominator information.
181///
182/// A back-edge is an edge N -> H where H dominates N. Each back-edge
183/// defines a natural loop whose body is the set of blocks that can reach N
184/// without going through H.
185pub fn find_loops(cfg: &ControlFlowGraph, dom: &DominatorTree) -> Vec<NaturalLoop> {
186    let mut loops = Vec::new();
187
188    for edge in &cfg.edges {
189        // A back-edge: edge.to dominates edge.from
190        if dom.dominates(edge.to, edge.from) {
191            let header = edge.to;
192            let latch = edge.from;
193
194            // Collect loop body: all blocks that can reach `latch` without
195            // going through `header`.
196            let body = collect_loop_body(cfg, header, latch);
197
198            // Classify the loop kind based on the edge type and instructions.
199            let kind = classify_loop(cfg, edge.kind, header);
200
201            loops.push(NaturalLoop {
202                header,
203                latch,
204                body,
205                kind,
206            });
207        }
208    }
209
210    // Sort loops by header for deterministic ordering.
211    loops.sort_by_key(|l| (l.header, l.latch));
212    loops
213}
214
215/// Collect all blocks in the natural loop defined by the back-edge latch -> header.
216fn collect_loop_body(cfg: &ControlFlowGraph, header: usize, latch: usize) -> Vec<usize> {
217    let mut body = vec![false; cfg.num_blocks()];
218    body[header] = true;
219
220    if header == latch {
221        return vec![header];
222    }
223
224    body[latch] = true;
225    let mut stack = vec![latch];
226
227    // Work backwards from latch, adding predecessors until we reach header.
228    while let Some(b) = stack.pop() {
229        for &pred in &cfg.blocks[b].predecessors {
230            if !body[pred] {
231                body[pred] = true;
232                stack.push(pred);
233            }
234        }
235    }
236
237    body.iter()
238        .enumerate()
239        .filter_map(|(i, &in_loop)| if in_loop { Some(i) } else { None })
240        .collect()
241}
242
243/// Classify a loop based on the back-edge kind and header instructions.
244fn classify_loop(cfg: &ControlFlowGraph, edge_kind: EdgeKind, header: usize) -> LoopKind {
245    match edge_kind {
246        EdgeKind::ForLoopBack => LoopKind::NumericFor,
247        EdgeKind::TForLoopBack => LoopKind::GenericFor,
248        _ => {
249            // Check if header block contains FORLOOP or TFORLOOP
250            let block = &cfg.blocks[header];
251            use crate::lua51::opcodes::OpCode;
252            for pc in block.start..=block.end {
253                match cfg.instructions[pc].op {
254                    OpCode::ForLoop => return LoopKind::NumericFor,
255                    OpCode::TForLoop => return LoopKind::GenericFor,
256                    _ => {}
257                }
258            }
259            LoopKind::WhileRepeat
260        }
261    }
262}
263
264// --- RPO computation ---
265
266struct Rpo {
267    /// Block IDs in reverse postorder.
268    order: Vec<usize>,
269    /// RPO number for each block ID.
270    number: Vec<usize>,
271}
272
273fn compute_rpo(cfg: &ControlFlowGraph, n: usize) -> Rpo {
274    let mut visited = vec![false; n];
275    let mut postorder = Vec::with_capacity(n);
276
277    fn dfs(
278        b: usize,
279        cfg: &ControlFlowGraph,
280        visited: &mut Vec<bool>,
281        postorder: &mut Vec<usize>,
282    ) {
283        visited[b] = true;
284        for &succ in &cfg.blocks[b].successors {
285            if !visited[succ] {
286                dfs(succ, cfg, visited, postorder);
287            }
288        }
289        postorder.push(b);
290    }
291
292    dfs(0, cfg, &mut visited, &mut postorder);
293
294    // Reverse postorder
295    postorder.reverse();
296    let mut number = vec![0usize; n];
297    for (i, &b) in postorder.iter().enumerate() {
298        number[b] = i;
299    }
300
301    Rpo {
302        order: postorder,
303        number,
304    }
305}
306
307/// Compute dominance frontiers using the algorithm from
308/// Cooper, Harvey, Kennedy (2001).
309fn compute_dominance_frontiers(idom: &[usize], cfg: &ControlFlowGraph, n: usize) -> Vec<Vec<usize>> {
310    let mut frontiers = vec![Vec::new(); n];
311
312    for b in 0..n {
313        let preds = &cfg.blocks[b].predecessors;
314        if preds.len() >= 2 {
315            for &p in preds {
316                let mut runner = p;
317                while runner != idom[b] && runner != usize::MAX {
318                    if !frontiers[runner].contains(&b) {
319                        frontiers[runner].push(b);
320                    }
321                    if runner == idom[runner] {
322                        break;
323                    }
324                    runner = idom[runner];
325                }
326            }
327        }
328    }
329
330    frontiers
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336    use crate::lua51::cfg::ControlFlowGraph;
337    use crate::lua51::instruction::{encode_abc, encode_abx, encode_asbx};
338    use crate::lua51::opcodes::OpCode;
339
340    #[test]
341    fn dominator_linear() {
342        // Linear code: single block, entry dominates itself
343        let code = vec![
344            encode_abx(OpCode::LoadK, 0, 0),
345            encode_abc(OpCode::Return, 0, 1, 0),
346        ];
347        let cfg = ControlFlowGraph::build(&code);
348        let dom = DominatorTree::build(&cfg);
349        assert_eq!(dom.idom(0), 0);
350        assert!(dom.dominates(0, 0));
351    }
352
353    #[test]
354    fn dominator_diamond() {
355        // Diamond CFG:
356        // [0] EQ 0 0 1      block 0: [0,1]
357        // [1] JMP +1         -> block 2 (false), block 1 (true)
358        // [2] LOADK 2 0      block 1: [2]
359        // [3] LOADK 3 0      block 2: [3]
360        // [4] RETURN 0 1     block 2 continues: [3,4] -- wait, 3 is a leader
361        //
362        // Actually let me lay this out more carefully:
363        // [0] EQ 0 0 1       -- test
364        // [1] JMP +2         -- if false, jump to [4]
365        // [2] LOADK 2 0      -- true path
366        // [3] JMP +0         -- jump to [4]
367        // [4] RETURN 0 1     -- merge point
368        let code = vec![
369            encode_abc(OpCode::Eq, 0, 0, 1),
370            encode_asbx(OpCode::Jmp, 0, 2),
371            encode_abx(OpCode::LoadK, 2, 0),
372            encode_asbx(OpCode::Jmp, 0, 0),
373            encode_abc(OpCode::Return, 0, 1, 0),
374        ];
375        let cfg = ControlFlowGraph::build(&code);
376        let dom = DominatorTree::build(&cfg);
377
378        // Block 0 dominates all blocks
379        for b in 0..cfg.num_blocks() {
380            assert!(dom.dominates(0, b), "block 0 should dominate block {}", b);
381        }
382    }
383
384    #[test]
385    fn detect_numeric_for_loop() {
386        // [0] LOADK 0 0      -- init
387        // [1] LOADK 1 1      -- limit
388        // [2] LOADK 2 2      -- step
389        // [3] FORPREP 0 +1   -- jump to [5]
390        // [4] LOADK 4 0      -- loop body
391        // [5] FORLOOP 0 -2   -- back to [4]
392        // [6] RETURN 0 1
393        let code = vec![
394            encode_abx(OpCode::LoadK, 0, 0),
395            encode_abx(OpCode::LoadK, 1, 1),
396            encode_abx(OpCode::LoadK, 2, 2),
397            encode_asbx(OpCode::ForPrep, 0, 1),
398            encode_abx(OpCode::LoadK, 4, 0),
399            encode_asbx(OpCode::ForLoop, 0, -2),
400            encode_abc(OpCode::Return, 0, 1, 0),
401        ];
402        let cfg = ControlFlowGraph::build(&code);
403        let dom = DominatorTree::build(&cfg);
404        let loops = find_loops(&cfg, &dom);
405
406        assert_eq!(loops.len(), 1);
407        assert_eq!(loops[0].kind, LoopKind::NumericFor);
408        assert!(loops[0].body.len() >= 1);
409    }
410
411    #[test]
412    fn detect_while_loop() {
413        // Simple while-style loop:
414        // [0] EQ 0 0 1       -- while condition test
415        // [1] JMP +2         -- if false, exit to [4]
416        // [2] LOADK 2 0      -- loop body
417        // [3] JMP -4         -- jump back to [0]
418        // [4] RETURN 0 1
419        let code = vec![
420            encode_abc(OpCode::Eq, 0, 0, 1),
421            encode_asbx(OpCode::Jmp, 0, 2),
422            encode_abx(OpCode::LoadK, 2, 0),
423            encode_asbx(OpCode::Jmp, 0, -4),
424            encode_abc(OpCode::Return, 0, 1, 0),
425        ];
426        let cfg = ControlFlowGraph::build(&code);
427        let dom = DominatorTree::build(&cfg);
428        let loops = find_loops(&cfg, &dom);
429
430        assert_eq!(loops.len(), 1);
431        assert_eq!(loops[0].kind, LoopKind::WhileRepeat);
432    }
433
434    #[test]
435    fn empty_cfg_dominator() {
436        let cfg = ControlFlowGraph::build(&[]);
437        let dom = DominatorTree::build(&cfg);
438        assert_eq!(dom.num_blocks(), 0);
439        let loops = find_loops(&cfg, &dom);
440        assert!(loops.is_empty());
441    }
442}