Skip to main content

jetro_core/
cfg.rs

1//! Control-flow graph for v2 programs.
2//!
3//! v2 opcodes are linear within a program, but sub-programs (inside
4//! `AndOp`, `OrOp`, `CoalesceOp`, `FilterMap`, etc.) introduce
5//! conditional / looped branches.  A `BasicBlock` holds a contiguous
6//! non-branching op slice; a `CFG` is a tree of blocks joined by
7//! typed edges.
8//!
9//! This is enough structure for classic analyses (liveness, dominators)
10//! to operate on — v2's restricted branch set means reducible CFGs only.
11
12use std::sync::Arc;
13use std::collections::HashSet;
14use super::vm::{Program, Opcode};
15
16/// A basic block: a straight-line opcode slice with no inner branches.
17/// `branches` lists the sub-program blocks reached at the block's end.
18#[derive(Debug, Clone)]
19pub struct BasicBlock {
20    pub id:       usize,
21    pub ops:      Vec<Opcode>,
22    pub branches: Vec<EdgeKind>,
23}
24
25/// Edge between blocks describing how control flows.
26#[derive(Debug, Clone)]
27pub enum EdgeKind {
28    /// Conditional short-circuit: `AndOp` / `OrOp` — right side executed
29    /// only when left is truthy / falsy respectively.
30    ShortCircuit { target: usize, condition: Condition },
31    /// Null-coalesce branch taken when left is null.
32    Coalesce { target: usize },
33    /// Per-element loop (filter / map body).
34    Loop { target: usize, name: &'static str },
35    /// Destructure / bind — always taken, introduces new scope.
36    Bind { target: usize },
37    /// Comprehension body.
38    Comp { target: usize, part: CompPart },
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum Condition { IfTruthy, IfFalsy, IfNull }
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum CompPart { Expr, Iter, Cond, Key, Val }
46
47#[derive(Debug, Clone)]
48pub struct Cfg {
49    pub blocks: Vec<BasicBlock>,
50    pub entry:  usize,
51}
52
53impl Cfg {
54    /// Build a CFG from a compiled program.  Sub-programs become their
55    /// own blocks recursively; the returned `entry` is the root block id.
56    pub fn build(program: &Program) -> Cfg {
57        let mut cfg = Cfg { blocks: Vec::new(), entry: 0 };
58        cfg.entry = build_block(&mut cfg, &program.ops);
59        cfg
60    }
61
62    /// Number of basic blocks in the graph.
63    pub fn size(&self) -> usize { self.blocks.len() }
64
65    /// Reachable blocks from `entry` via BFS.
66    pub fn reachable(&self) -> Vec<usize> {
67        let mut visited = vec![false; self.blocks.len()];
68        let mut queue = vec![self.entry];
69        let mut out   = Vec::new();
70        while let Some(id) = queue.pop() {
71            if visited[id] { continue; }
72            visited[id] = true;
73            out.push(id);
74            for e in &self.blocks[id].branches {
75                let t = edge_target(e);
76                queue.push(t);
77            }
78        }
79        out
80    }
81
82    /// Compute immediate dominators by iterative dataflow (Cooper–Harvey–Kennedy).
83    pub fn dominators(&self) -> Vec<Option<usize>> {
84        let n = self.blocks.len();
85        let mut doms: Vec<Option<usize>> = vec![None; n];
86        if n == 0 { return doms; }
87        doms[self.entry] = Some(self.entry);
88        // Post-order traversal.
89        let order = self.reachable();
90        let mut changed = true;
91        while changed {
92            changed = false;
93            for &b in order.iter().rev() {
94                if b == self.entry { continue; }
95                // Find all predecessors.
96                let preds: Vec<usize> = (0..n).filter(|&p|
97                    self.blocks[p].branches.iter().any(|e| edge_target(e) == b)
98                ).collect();
99                let mut new_idom: Option<usize> = None;
100                for p in preds {
101                    if doms[p].is_none() { continue; }
102                    new_idom = Some(match new_idom {
103                        None => p,
104                        Some(cur) => intersect(&doms, p, cur),
105                    });
106                }
107                if doms[b] != new_idom {
108                    doms[b] = new_idom;
109                    changed = true;
110                }
111            }
112        }
113        doms
114    }
115
116    /// Predecessor list per block.
117    pub fn preds(&self) -> Vec<Vec<usize>> {
118        let n = self.blocks.len();
119        let mut p: Vec<Vec<usize>> = vec![Vec::new(); n];
120        for (bi, b) in self.blocks.iter().enumerate() {
121            for e in &b.branches { p[edge_target(e)].push(bi); }
122        }
123        p
124    }
125
126    /// Dominance frontier per block (Cytron et al.).
127    pub fn dominance_frontiers(&self) -> Vec<HashSet<usize>> {
128        let n = self.blocks.len();
129        let doms = self.dominators();
130        let preds = self.preds();
131        let mut df: Vec<HashSet<usize>> = vec![HashSet::new(); n];
132        for b in 0..n {
133            if preds[b].len() < 2 { continue; }
134            let Some(idom_b) = doms[b] else { continue };
135            for &p in &preds[b] {
136                let mut runner = p;
137                while Some(runner) != Some(idom_b) && doms[runner].is_some() {
138                    df[runner].insert(b);
139                    let next = doms[runner].unwrap();
140                    if next == runner { break; }
141                    runner = next;
142                }
143            }
144        }
145        df
146    }
147
148    /// Loop headers: blocks that dominate one of their own predecessors
149    /// (i.e. back-edges terminate here).  Returns (header, back_edge_source).
150    pub fn loop_headers(&self) -> Vec<(usize, usize)> {
151        let doms = self.dominators();
152        let preds = self.preds();
153        let mut out = Vec::new();
154        for (b, ps) in preds.iter().enumerate() {
155            for &p in ps {
156                if dominates(&doms, b, p) { out.push((b, p)); }
157            }
158        }
159        out
160    }
161}
162
163/// Does `a` dominate `b`?
164fn dominates(doms: &[Option<usize>], a: usize, mut b: usize) -> bool {
165    loop {
166        if a == b { return true; }
167        let Some(next) = doms[b] else { return false };
168        if next == b { return false; }
169        b = next;
170    }
171}
172
173fn edge_target(e: &EdgeKind) -> usize {
174    match e {
175        EdgeKind::ShortCircuit { target, .. }
176            | EdgeKind::Coalesce { target }
177            | EdgeKind::Loop { target, .. }
178            | EdgeKind::Bind { target }
179            | EdgeKind::Comp { target, .. } => *target,
180    }
181}
182
183fn intersect(doms: &[Option<usize>], mut a: usize, mut b: usize) -> usize {
184    while a != b {
185        while a > b { a = doms[a].unwrap_or(a); if a == doms[a].unwrap_or(a) && a != b { break; } }
186        while b > a { b = doms[b].unwrap_or(b); if b == doms[b].unwrap_or(b) && a != b { break; } }
187        if doms[a].map_or(true, |d| d == a) && doms[b].map_or(true, |d| d == b) { break; }
188    }
189    a
190}
191
192fn build_block(cfg: &mut Cfg, ops: &[Opcode]) -> usize {
193    let id = cfg.blocks.len();
194    cfg.blocks.push(BasicBlock { id, ops: Vec::new(), branches: Vec::new() });
195    let mut straight: Vec<Opcode> = Vec::new();
196    let mut branches: Vec<EdgeKind> = Vec::new();
197    for op in ops {
198        match op {
199            Opcode::AndOp(p) => {
200                let t = build_block(cfg, &p.ops);
201                branches.push(EdgeKind::ShortCircuit { target: t, condition: Condition::IfTruthy });
202                straight.push(op.clone());
203            }
204            Opcode::OrOp(p) => {
205                let t = build_block(cfg, &p.ops);
206                branches.push(EdgeKind::ShortCircuit { target: t, condition: Condition::IfFalsy });
207                straight.push(op.clone());
208            }
209            Opcode::CoalesceOp(p) => {
210                let t = build_block(cfg, &p.ops);
211                branches.push(EdgeKind::Coalesce { target: t });
212                straight.push(op.clone());
213            }
214            Opcode::InlineFilter(p) | Opcode::FilterCount(p)
215                | Opcode::FindFirst(p) | Opcode::FindOne(p)
216                | Opcode::MapSum(p) | Opcode::MapAvg(p)
217                | Opcode::MapFlatten(p) => {
218                let t = build_block(cfg, &p.ops);
219                branches.push(EdgeKind::Loop { target: t, name: "filter" });
220                straight.push(op.clone());
221            }
222            Opcode::FilterTakeWhile { pred, stop } => {
223                let tp = build_block(cfg, &pred.ops);
224                let ts = build_block(cfg, &stop.ops);
225                branches.push(EdgeKind::Loop { target: tp, name: "filter" });
226                branches.push(EdgeKind::Loop { target: ts, name: "stop" });
227                straight.push(op.clone());
228            }
229            Opcode::FilterMap { pred, map } => {
230                let tp = build_block(cfg, &pred.ops);
231                let tm = build_block(cfg, &map.ops);
232                branches.push(EdgeKind::Loop { target: tp, name: "filter" });
233                branches.push(EdgeKind::Loop { target: tm, name: "map" });
234                straight.push(op.clone());
235            }
236            Opcode::FilterFilter { p1, p2 } => {
237                let t1 = build_block(cfg, &p1.ops);
238                let t2 = build_block(cfg, &p2.ops);
239                branches.push(EdgeKind::Loop { target: t1, name: "filter1" });
240                branches.push(EdgeKind::Loop { target: t2, name: "filter2" });
241                straight.push(op.clone());
242            }
243            Opcode::MapMap { f1, f2 } => {
244                let t1 = build_block(cfg, &f1.ops);
245                let t2 = build_block(cfg, &f2.ops);
246                branches.push(EdgeKind::Loop { target: t1, name: "map1" });
247                branches.push(EdgeKind::Loop { target: t2, name: "map2" });
248                straight.push(op.clone());
249            }
250            Opcode::LetExpr { body, .. } => {
251                let t = build_block(cfg, &body.ops);
252                branches.push(EdgeKind::Bind { target: t });
253                straight.push(op.clone());
254            }
255            Opcode::ListComp(s) | Opcode::SetComp(s) => {
256                let te = build_block(cfg, &s.expr.ops);
257                let ti = build_block(cfg, &s.iter.ops);
258                branches.push(EdgeKind::Comp { target: te, part: CompPart::Expr });
259                branches.push(EdgeKind::Comp { target: ti, part: CompPart::Iter });
260                if let Some(c) = &s.cond {
261                    let tc = build_block(cfg, &c.ops);
262                    branches.push(EdgeKind::Comp { target: tc, part: CompPart::Cond });
263                }
264                straight.push(op.clone());
265            }
266            Opcode::DictComp(s) => {
267                let tk = build_block(cfg, &s.key.ops);
268                let tv = build_block(cfg, &s.val.ops);
269                let ti = build_block(cfg, &s.iter.ops);
270                branches.push(EdgeKind::Comp { target: tk, part: CompPart::Key });
271                branches.push(EdgeKind::Comp { target: tv, part: CompPart::Val });
272                branches.push(EdgeKind::Comp { target: ti, part: CompPart::Iter });
273                if let Some(c) = &s.cond {
274                    let tc = build_block(cfg, &c.ops);
275                    branches.push(EdgeKind::Comp { target: tc, part: CompPart::Cond });
276                }
277                straight.push(op.clone());
278            }
279            Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
280                for p in c.sub_progs.iter() {
281                    let t = build_block(cfg, &p.ops);
282                    branches.push(EdgeKind::Loop { target: t, name: "method" });
283                }
284                straight.push(op.clone());
285            }
286            _ => straight.push(op.clone()),
287        }
288    }
289    cfg.blocks[id].ops = straight;
290    cfg.blocks[id].branches = branches;
291    id
292}
293
294// Silence unused-Arc warning.
295#[allow(dead_code)]
296fn _use_arc<T>(_: Arc<T>) {}
297
298// ── Liveness analysis ────────────────────────────────────────────────────────
299
300/// Per-block live-in / live-out sets of identifier names.
301#[derive(Debug, Clone, Default)]
302pub struct Liveness {
303    pub live_in:  Vec<HashSet<Arc<str>>>,
304    pub live_out: Vec<HashSet<Arc<str>>>,
305}
306
307impl Cfg {
308    /// Compute live-in / live-out sets for identifiers (variables) across
309    /// the CFG using standard backward dataflow.  A variable is live at a
310    /// point if a `LoadIdent` for it exists on some path to the exit.
311    pub fn liveness(&self) -> Liveness {
312        let n = self.blocks.len();
313        let mut live_in:  Vec<HashSet<Arc<str>>> = vec![HashSet::new(); n];
314        let mut live_out: Vec<HashSet<Arc<str>>> = vec![HashSet::new(); n];
315
316        // Per-block USE (read before any DEF) and DEF (written by LetExpr/BindVar).
317        let (usev, defv) = (0..n).map(|i| compute_use_def(&self.blocks[i].ops))
318            .fold((Vec::new(), Vec::new()), |(mut u, mut d), (bu, bd)| { u.push(bu); d.push(bd); (u, d) });
319
320        let mut changed = true;
321        while changed {
322            changed = false;
323            for b in 0..n {
324                // live_out[b] = U over successors s: live_in[s]
325                let mut new_out: HashSet<Arc<str>> = HashSet::new();
326                for e in &self.blocks[b].branches {
327                    let s = edge_target(e);
328                    new_out.extend(live_in[s].iter().cloned());
329                }
330                // live_in[b] = use[b] ∪ (live_out[b] − def[b])
331                let mut new_in = usev[b].clone();
332                for v in &new_out {
333                    if !defv[b].contains(v) { new_in.insert(v.clone()); }
334                }
335                if new_in != live_in[b]  { live_in[b]  = new_in;  changed = true; }
336                if new_out != live_out[b]{ live_out[b] = new_out; changed = true; }
337            }
338        }
339        Liveness { live_in, live_out }
340    }
341}
342
343// ── Live-range allocator ─────────────────────────────────────────────────────
344
345use std::collections::HashMap;
346
347/// Slot assignment for each let/bind-introduced name.
348#[derive(Debug, Clone, Default)]
349pub struct SlotMap {
350    pub slots: HashMap<Arc<str>, usize>,
351    pub count: usize,
352}
353
354impl Cfg {
355    /// Assign a compact slot index per ident using greedy graph colouring
356    /// over the interference graph derived from liveness.  Names live at
357    /// the same block interfere; greedy picks the lowest free slot.
358    pub fn allocate_slots(&self, live: &Liveness) -> SlotMap {
359        // Collect all defined names.
360        let mut all: Vec<Arc<str>> = Vec::new();
361        let mut seen: HashSet<Arc<str>> = HashSet::new();
362        for b in &self.blocks {
363            for op in &b.ops {
364                match op {
365                    Opcode::BindVar(n) | Opcode::StoreVar(n)
366                        | Opcode::LetExpr { name: n, .. } => {
367                        if seen.insert(n.clone()) { all.push(n.clone()); }
368                    }
369                    _ => {}
370                }
371            }
372        }
373        // Interference: a,b interfere if both in some live_in/live_out.
374        let mut interf: HashMap<Arc<str>, HashSet<Arc<str>>> = HashMap::new();
375        let add_edge = |a: &Arc<str>, b: &Arc<str>, m: &mut HashMap<Arc<str>, HashSet<Arc<str>>>| {
376            if a != b {
377                m.entry(a.clone()).or_default().insert(b.clone());
378                m.entry(b.clone()).or_default().insert(a.clone());
379            }
380        };
381        for s in live.live_in.iter().chain(live.live_out.iter()) {
382            let v: Vec<&Arc<str>> = s.iter().collect();
383            for i in 0..v.len() {
384                for j in (i+1)..v.len() { add_edge(v[i], v[j], &mut interf); }
385            }
386        }
387        // Greedy colouring.
388        let mut slots: HashMap<Arc<str>, usize> = HashMap::new();
389        let mut count = 0;
390        for name in &all {
391            let neighbours = interf.get(name).cloned().unwrap_or_default();
392            let used: HashSet<usize> = neighbours.iter()
393                .filter_map(|n| slots.get(n).copied()).collect();
394            let slot = (0..).find(|s| !used.contains(s)).unwrap();
395            if slot + 1 > count { count = slot + 1; }
396            slots.insert(name.clone(), slot);
397        }
398        SlotMap { slots, count }
399    }
400}
401
402fn compute_use_def(ops: &[Opcode]) -> (HashSet<Arc<str>>, HashSet<Arc<str>>) {
403    let mut use_set = HashSet::new();
404    let mut def_set = HashSet::new();
405    for op in ops {
406        match op {
407            Opcode::LoadIdent(n) => {
408                if !def_set.contains(n) { use_set.insert(n.clone()); }
409            }
410            Opcode::BindVar(n) | Opcode::StoreVar(n) | Opcode::LetExpr { name: n, .. } => {
411                def_set.insert(n.clone());
412            }
413            _ => {}
414        }
415    }
416    (use_set, def_set)
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422    use crate::vm::Compiler;
423
424    #[test]
425    fn cfg_linear_single_block() {
426        let p = Compiler::compile_str("1 + 2").unwrap();
427        let cfg = Cfg::build(&p);
428        assert_eq!(cfg.size(), 1);
429    }
430
431    #[test]
432    fn cfg_and_creates_branch() {
433        let p = Compiler::compile_str("$.a and $.b").unwrap();
434        let cfg = Cfg::build(&p);
435        assert!(cfg.size() >= 2, "AndOp should create child block");
436        let root = &cfg.blocks[cfg.entry];
437        assert!(root.branches.iter().any(|e| matches!(e,
438            EdgeKind::ShortCircuit { condition: Condition::IfTruthy, .. })));
439    }
440
441    #[test]
442    fn cfg_filter_creates_loop() {
443        let p = Compiler::compile_str("$.x.filter(@.a > 1)").unwrap();
444        let cfg = Cfg::build(&p);
445        assert!(cfg.size() >= 2);
446    }
447
448    #[test]
449    fn cfg_reachable_covers_all() {
450        let p = Compiler::compile_str("$.a.filter(@.x > 1).map(@.y)").unwrap();
451        let cfg = Cfg::build(&p);
452        let r = cfg.reachable();
453        assert_eq!(r.len(), cfg.size());
454    }
455
456    #[test]
457    fn cfg_liveness_tracks_let_body() {
458        let p = Compiler::compile_str("let x = $.a in x + x").unwrap();
459        let cfg = Cfg::build(&p);
460        let live = cfg.liveness();
461        // Body block should have x live-in.
462        let body_has_x = live.live_in.iter().any(|s|
463            s.iter().any(|n| n.as_ref() == "x"));
464        assert!(body_has_x, "x should be live inside let body");
465    }
466
467    #[test]
468    fn cfg_dominators_nonempty() {
469        let p = Compiler::compile_str("$.a and $.b").unwrap();
470        let cfg = Cfg::build(&p);
471        let doms = cfg.dominators();
472        assert_eq!(doms.len(), cfg.size());
473        // Entry dominates itself.
474        assert_eq!(doms[cfg.entry], Some(cfg.entry));
475    }
476
477    #[test]
478    fn cfg_dominance_frontiers_sized() {
479        let p = Compiler::compile_str("$.a and $.b").unwrap();
480        let cfg = Cfg::build(&p);
481        let df = cfg.dominance_frontiers();
482        assert_eq!(df.len(), cfg.size());
483    }
484
485    #[test]
486    fn cfg_slot_allocator_distinct() {
487        let p = Compiler::compile_str("let x = $.a in let y = x + 1 in y * 2").unwrap();
488        let cfg = Cfg::build(&p);
489        let live = cfg.liveness();
490        let slots = cfg.allocate_slots(&live);
491        assert!(slots.slots.contains_key("x"));
492        assert!(slots.slots.contains_key("y"));
493    }
494}