Skip to main content

jetro_core/
cfg.rs

1//! Control-flow graph for v2 programs.
2//!
3//! v2 opcodes are linear within a program, but sub-programs (inside
4//! `AndOp`, `OrOp`, `CoalesceOp`, `FilterMap`, etc.) introduce
5//! conditional / looped branches.  A `BasicBlock` holds a contiguous
6//! non-branching op slice; a `CFG` is a tree of blocks joined by
7//! typed edges.
8//!
9//! This is enough structure for classic analyses (liveness, dominators)
10//! to operate on — v2's restricted branch set means reducible CFGs only.
11
12use std::sync::Arc;
13use std::collections::HashSet;
14use super::vm::{Program, Opcode};
15
16/// A basic block: a straight-line opcode slice with no inner branches.
17/// `branches` lists the sub-program blocks reached at the block's end.
18#[derive(Debug, Clone)]
19pub struct BasicBlock {
20    pub id:       usize,
21    pub ops:      Vec<Opcode>,
22    pub branches: Vec<EdgeKind>,
23}
24
25/// Edge between blocks describing how control flows.
26#[derive(Debug, Clone)]
27pub enum EdgeKind {
28    /// Conditional short-circuit: `AndOp` / `OrOp` — right side executed
29    /// only when left is truthy / falsy respectively.
30    ShortCircuit { target: usize, condition: Condition },
31    /// Null-coalesce branch taken when left is null.
32    Coalesce { target: usize },
33    /// Per-element loop (filter / map body).
34    Loop { target: usize, name: &'static str },
35    /// Destructure / bind — always taken, introduces new scope.
36    Bind { target: usize },
37    /// Comprehension body.
38    Comp { target: usize, part: CompPart },
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum Condition { IfTruthy, IfFalsy, IfNull }
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum CompPart { Expr, Iter, Cond, Key, Val }
46
47#[derive(Debug, Clone)]
48pub struct Cfg {
49    pub blocks: Vec<BasicBlock>,
50    pub entry:  usize,
51}
52
53impl Cfg {
54    /// Build a CFG from a compiled program.  Sub-programs become their
55    /// own blocks recursively; the returned `entry` is the root block id.
56    pub fn build(program: &Program) -> Cfg {
57        let mut cfg = Cfg { blocks: Vec::new(), entry: 0 };
58        cfg.entry = build_block(&mut cfg, &program.ops);
59        cfg
60    }
61
62    /// Number of basic blocks in the graph.
63    pub fn size(&self) -> usize { self.blocks.len() }
64
65    /// Reachable blocks from `entry` via BFS.
66    pub fn reachable(&self) -> Vec<usize> {
67        let mut visited = vec![false; self.blocks.len()];
68        let mut queue = vec![self.entry];
69        let mut out   = Vec::new();
70        while let Some(id) = queue.pop() {
71            if visited[id] { continue; }
72            visited[id] = true;
73            out.push(id);
74            for e in &self.blocks[id].branches {
75                let t = edge_target(e);
76                queue.push(t);
77            }
78        }
79        out
80    }
81
82    /// Compute immediate dominators by iterative dataflow (Cooper–Harvey–Kennedy).
83    pub fn dominators(&self) -> Vec<Option<usize>> {
84        let n = self.blocks.len();
85        let mut doms: Vec<Option<usize>> = vec![None; n];
86        if n == 0 { return doms; }
87        doms[self.entry] = Some(self.entry);
88        // Post-order traversal.
89        let order = self.reachable();
90        let mut changed = true;
91        while changed {
92            changed = false;
93            for &b in order.iter().rev() {
94                if b == self.entry { continue; }
95                // Find all predecessors.
96                let preds: Vec<usize> = (0..n).filter(|&p|
97                    self.blocks[p].branches.iter().any(|e| edge_target(e) == b)
98                ).collect();
99                let mut new_idom: Option<usize> = None;
100                for p in preds {
101                    if doms[p].is_none() { continue; }
102                    new_idom = Some(match new_idom {
103                        None => p,
104                        Some(cur) => intersect(&doms, p, cur),
105                    });
106                }
107                if doms[b] != new_idom {
108                    doms[b] = new_idom;
109                    changed = true;
110                }
111            }
112        }
113        doms
114    }
115
116    /// Predecessor list per block.
117    pub fn preds(&self) -> Vec<Vec<usize>> {
118        let n = self.blocks.len();
119        let mut p: Vec<Vec<usize>> = vec![Vec::new(); n];
120        for (bi, b) in self.blocks.iter().enumerate() {
121            for e in &b.branches { p[edge_target(e)].push(bi); }
122        }
123        p
124    }
125
126    /// Dominance frontier per block (Cytron et al.).
127    pub fn dominance_frontiers(&self) -> Vec<HashSet<usize>> {
128        let n = self.blocks.len();
129        let doms = self.dominators();
130        let preds = self.preds();
131        let mut df: Vec<HashSet<usize>> = vec![HashSet::new(); n];
132        for b in 0..n {
133            if preds[b].len() < 2 { continue; }
134            let Some(idom_b) = doms[b] else { continue };
135            for &p in &preds[b] {
136                let mut runner = p;
137                while Some(runner) != Some(idom_b) && doms[runner].is_some() {
138                    df[runner].insert(b);
139                    let next = doms[runner].unwrap();
140                    if next == runner { break; }
141                    runner = next;
142                }
143            }
144        }
145        df
146    }
147
148    /// Loop headers: blocks that dominate one of their own predecessors
149    /// (i.e. back-edges terminate here).  Returns (header, back_edge_source).
150    pub fn loop_headers(&self) -> Vec<(usize, usize)> {
151        let doms = self.dominators();
152        let preds = self.preds();
153        let mut out = Vec::new();
154        for (b, ps) in preds.iter().enumerate() {
155            for &p in ps {
156                if dominates(&doms, b, p) { out.push((b, p)); }
157            }
158        }
159        out
160    }
161}
162
163/// Does `a` dominate `b`?
164fn dominates(doms: &[Option<usize>], a: usize, mut b: usize) -> bool {
165    loop {
166        if a == b { return true; }
167        let Some(next) = doms[b] else { return false };
168        if next == b { return false; }
169        b = next;
170    }
171}
172
173fn edge_target(e: &EdgeKind) -> usize {
174    match e {
175        EdgeKind::ShortCircuit { target, .. }
176            | EdgeKind::Coalesce { target }
177            | EdgeKind::Loop { target, .. }
178            | EdgeKind::Bind { target }
179            | EdgeKind::Comp { target, .. } => *target,
180    }
181}
182
183fn intersect(doms: &[Option<usize>], mut a: usize, mut b: usize) -> usize {
184    while a != b {
185        while a > b { a = doms[a].unwrap_or(a); if a == doms[a].unwrap_or(a) && a != b { break; } }
186        while b > a { b = doms[b].unwrap_or(b); if b == doms[b].unwrap_or(b) && a != b { break; } }
187        if doms[a].map_or(true, |d| d == a) && doms[b].map_or(true, |d| d == b) { break; }
188    }
189    a
190}
191
192fn build_block(cfg: &mut Cfg, ops: &[Opcode]) -> usize {
193    let id = cfg.blocks.len();
194    cfg.blocks.push(BasicBlock { id, ops: Vec::new(), branches: Vec::new() });
195    let mut straight: Vec<Opcode> = Vec::new();
196    let mut branches: Vec<EdgeKind> = Vec::new();
197    for op in ops {
198        match op {
199            Opcode::AndOp(p) => {
200                let t = build_block(cfg, &p.ops);
201                branches.push(EdgeKind::ShortCircuit { target: t, condition: Condition::IfTruthy });
202                straight.push(op.clone());
203            }
204            Opcode::OrOp(p) => {
205                let t = build_block(cfg, &p.ops);
206                branches.push(EdgeKind::ShortCircuit { target: t, condition: Condition::IfFalsy });
207                straight.push(op.clone());
208            }
209            Opcode::CoalesceOp(p) => {
210                let t = build_block(cfg, &p.ops);
211                branches.push(EdgeKind::Coalesce { target: t });
212                straight.push(op.clone());
213            }
214            Opcode::InlineFilter(p) | Opcode::FilterCount(p)
215                | Opcode::FindFirst(p) | Opcode::FindOne(p)
216                | Opcode::MapSum(p) | Opcode::MapAvg(p)
217                | Opcode::MapFlatten(p)
218                | Opcode::MapFirst(p) | Opcode::MapLast(p)
219                | Opcode::FilterLast { pred: p } => {
220                let t = build_block(cfg, &p.ops);
221                branches.push(EdgeKind::Loop { target: t, name: "filter" });
222                straight.push(op.clone());
223            }
224            Opcode::FilterTakeWhile { pred, stop } => {
225                let tp = build_block(cfg, &pred.ops);
226                let ts = build_block(cfg, &stop.ops);
227                branches.push(EdgeKind::Loop { target: tp, name: "filter" });
228                branches.push(EdgeKind::Loop { target: ts, name: "stop" });
229                straight.push(op.clone());
230            }
231            Opcode::FilterMap { pred, map }
232                | Opcode::FilterMapSum { pred, map }
233                | Opcode::FilterMapAvg { pred, map }
234                | Opcode::FilterMapFirst { pred, map } => {
235                let tp = build_block(cfg, &pred.ops);
236                let tm = build_block(cfg, &map.ops);
237                branches.push(EdgeKind::Loop { target: tp, name: "filter" });
238                branches.push(EdgeKind::Loop { target: tm, name: "map" });
239                straight.push(op.clone());
240            }
241            Opcode::MapFilter { map, pred } => {
242                let tm = build_block(cfg, &map.ops);
243                let tp = build_block(cfg, &pred.ops);
244                branches.push(EdgeKind::Loop { target: tm, name: "map" });
245                branches.push(EdgeKind::Loop { target: tp, name: "filter" });
246                straight.push(op.clone());
247            }
248            Opcode::FilterFilter { p1, p2 } => {
249                let t1 = build_block(cfg, &p1.ops);
250                let t2 = build_block(cfg, &p2.ops);
251                branches.push(EdgeKind::Loop { target: t1, name: "filter1" });
252                branches.push(EdgeKind::Loop { target: t2, name: "filter2" });
253                straight.push(op.clone());
254            }
255            Opcode::MapMap { f1, f2 } => {
256                let t1 = build_block(cfg, &f1.ops);
257                let t2 = build_block(cfg, &f2.ops);
258                branches.push(EdgeKind::Loop { target: t1, name: "map1" });
259                branches.push(EdgeKind::Loop { target: t2, name: "map2" });
260                straight.push(op.clone());
261            }
262            Opcode::LetExpr { body, .. } => {
263                let t = build_block(cfg, &body.ops);
264                branches.push(EdgeKind::Bind { target: t });
265                straight.push(op.clone());
266            }
267            Opcode::ListComp(s) | Opcode::SetComp(s) => {
268                let te = build_block(cfg, &s.expr.ops);
269                let ti = build_block(cfg, &s.iter.ops);
270                branches.push(EdgeKind::Comp { target: te, part: CompPart::Expr });
271                branches.push(EdgeKind::Comp { target: ti, part: CompPart::Iter });
272                if let Some(c) = &s.cond {
273                    let tc = build_block(cfg, &c.ops);
274                    branches.push(EdgeKind::Comp { target: tc, part: CompPart::Cond });
275                }
276                straight.push(op.clone());
277            }
278            Opcode::DictComp(s) => {
279                let tk = build_block(cfg, &s.key.ops);
280                let tv = build_block(cfg, &s.val.ops);
281                let ti = build_block(cfg, &s.iter.ops);
282                branches.push(EdgeKind::Comp { target: tk, part: CompPart::Key });
283                branches.push(EdgeKind::Comp { target: tv, part: CompPart::Val });
284                branches.push(EdgeKind::Comp { target: ti, part: CompPart::Iter });
285                if let Some(c) = &s.cond {
286                    let tc = build_block(cfg, &c.ops);
287                    branches.push(EdgeKind::Comp { target: tc, part: CompPart::Cond });
288                }
289                straight.push(op.clone());
290            }
291            Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
292                for p in c.sub_progs.iter() {
293                    let t = build_block(cfg, &p.ops);
294                    branches.push(EdgeKind::Loop { target: t, name: "method" });
295                }
296                straight.push(op.clone());
297            }
298            _ => straight.push(op.clone()),
299        }
300    }
301    cfg.blocks[id].ops = straight;
302    cfg.blocks[id].branches = branches;
303    id
304}
305
306// Silence unused-Arc warning.
307#[allow(dead_code)]
308fn _use_arc<T>(_: Arc<T>) {}
309
310// ── Liveness analysis ────────────────────────────────────────────────────────
311
312/// Per-block live-in / live-out sets of identifier names.
313#[derive(Debug, Clone, Default)]
314pub struct Liveness {
315    pub live_in:  Vec<HashSet<Arc<str>>>,
316    pub live_out: Vec<HashSet<Arc<str>>>,
317}
318
319impl Cfg {
320    /// Compute live-in / live-out sets for identifiers (variables) across
321    /// the CFG using standard backward dataflow.  A variable is live at a
322    /// point if a `LoadIdent` for it exists on some path to the exit.
323    pub fn liveness(&self) -> Liveness {
324        let n = self.blocks.len();
325        let mut live_in:  Vec<HashSet<Arc<str>>> = vec![HashSet::new(); n];
326        let mut live_out: Vec<HashSet<Arc<str>>> = vec![HashSet::new(); n];
327
328        // Per-block USE (read before any DEF) and DEF (written by LetExpr/BindVar).
329        let (usev, defv) = (0..n).map(|i| compute_use_def(&self.blocks[i].ops))
330            .fold((Vec::new(), Vec::new()), |(mut u, mut d), (bu, bd)| { u.push(bu); d.push(bd); (u, d) });
331
332        let mut changed = true;
333        while changed {
334            changed = false;
335            for b in 0..n {
336                // live_out[b] = U over successors s: live_in[s]
337                let mut new_out: HashSet<Arc<str>> = HashSet::new();
338                for e in &self.blocks[b].branches {
339                    let s = edge_target(e);
340                    new_out.extend(live_in[s].iter().cloned());
341                }
342                // live_in[b] = use[b] ∪ (live_out[b] − def[b])
343                let mut new_in = usev[b].clone();
344                for v in &new_out {
345                    if !defv[b].contains(v) { new_in.insert(v.clone()); }
346                }
347                if new_in != live_in[b]  { live_in[b]  = new_in;  changed = true; }
348                if new_out != live_out[b]{ live_out[b] = new_out; changed = true; }
349            }
350        }
351        Liveness { live_in, live_out }
352    }
353}
354
355// ── Live-range allocator ─────────────────────────────────────────────────────
356
357use std::collections::HashMap;
358
359/// Slot assignment for each let/bind-introduced name.
360#[derive(Debug, Clone, Default)]
361pub struct SlotMap {
362    pub slots: HashMap<Arc<str>, usize>,
363    pub count: usize,
364}
365
366impl Cfg {
367    /// Assign a compact slot index per ident using greedy graph colouring
368    /// over the interference graph derived from liveness.  Names live at
369    /// the same block interfere; greedy picks the lowest free slot.
370    pub fn allocate_slots(&self, live: &Liveness) -> SlotMap {
371        // Collect all defined names.
372        let mut all: Vec<Arc<str>> = Vec::new();
373        let mut seen: HashSet<Arc<str>> = HashSet::new();
374        for b in &self.blocks {
375            for op in &b.ops {
376                match op {
377                    Opcode::BindVar(n) | Opcode::StoreVar(n)
378                        | Opcode::LetExpr { name: n, .. } => {
379                        if seen.insert(n.clone()) { all.push(n.clone()); }
380                    }
381                    _ => {}
382                }
383            }
384        }
385        // Interference: a,b interfere if both in some live_in/live_out.
386        let mut interf: HashMap<Arc<str>, HashSet<Arc<str>>> = HashMap::new();
387        let add_edge = |a: &Arc<str>, b: &Arc<str>, m: &mut HashMap<Arc<str>, HashSet<Arc<str>>>| {
388            if a != b {
389                m.entry(a.clone()).or_default().insert(b.clone());
390                m.entry(b.clone()).or_default().insert(a.clone());
391            }
392        };
393        for s in live.live_in.iter().chain(live.live_out.iter()) {
394            let v: Vec<&Arc<str>> = s.iter().collect();
395            for i in 0..v.len() {
396                for j in (i+1)..v.len() { add_edge(v[i], v[j], &mut interf); }
397            }
398        }
399        // Greedy colouring.
400        let mut slots: HashMap<Arc<str>, usize> = HashMap::new();
401        let mut count = 0;
402        for name in &all {
403            let neighbours = interf.get(name).cloned().unwrap_or_default();
404            let used: HashSet<usize> = neighbours.iter()
405                .filter_map(|n| slots.get(n).copied()).collect();
406            let slot = (0..).find(|s| !used.contains(s)).unwrap();
407            if slot + 1 > count { count = slot + 1; }
408            slots.insert(name.clone(), slot);
409        }
410        SlotMap { slots, count }
411    }
412}
413
414fn compute_use_def(ops: &[Opcode]) -> (HashSet<Arc<str>>, HashSet<Arc<str>>) {
415    let mut use_set = HashSet::new();
416    let mut def_set = HashSet::new();
417    for op in ops {
418        match op {
419            Opcode::LoadIdent(n) => {
420                if !def_set.contains(n) { use_set.insert(n.clone()); }
421            }
422            Opcode::BindVar(n) | Opcode::StoreVar(n) | Opcode::LetExpr { name: n, .. } => {
423                def_set.insert(n.clone());
424            }
425            _ => {}
426        }
427    }
428    (use_set, def_set)
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434    use crate::vm::Compiler;
435
436    #[test]
437    fn cfg_linear_single_block() {
438        let p = Compiler::compile_str("1 + 2").unwrap();
439        let cfg = Cfg::build(&p);
440        assert_eq!(cfg.size(), 1);
441    }
442
443    #[test]
444    fn cfg_and_creates_branch() {
445        let p = Compiler::compile_str("$.a and $.b").unwrap();
446        let cfg = Cfg::build(&p);
447        assert!(cfg.size() >= 2, "AndOp should create child block");
448        let root = &cfg.blocks[cfg.entry];
449        assert!(root.branches.iter().any(|e| matches!(e,
450            EdgeKind::ShortCircuit { condition: Condition::IfTruthy, .. })));
451    }
452
453    #[test]
454    fn cfg_filter_creates_loop() {
455        // Use a predicate that doesn't match the field-predicate specialiser
456        // (`FilterFieldCmpLit`) so the generic filter opcode survives.
457        let p = Compiler::compile_str("$.x.filter(@.a > 1 and @.a < 10)").unwrap();
458        let cfg = Cfg::build(&p);
459        assert!(cfg.size() >= 2);
460    }
461
462    #[test]
463    fn cfg_reachable_covers_all() {
464        let p = Compiler::compile_str("$.a.filter(@.x > 1).map(@.y)").unwrap();
465        let cfg = Cfg::build(&p);
466        let r = cfg.reachable();
467        assert_eq!(r.len(), cfg.size());
468    }
469
470    #[test]
471    fn cfg_liveness_tracks_let_body() {
472        let p = Compiler::compile_str("let x = $.a in x + x").unwrap();
473        let cfg = Cfg::build(&p);
474        let live = cfg.liveness();
475        // Body block should have x live-in.
476        let body_has_x = live.live_in.iter().any(|s|
477            s.iter().any(|n| n.as_ref() == "x"));
478        assert!(body_has_x, "x should be live inside let body");
479    }
480
481    #[test]
482    fn cfg_dominators_nonempty() {
483        let p = Compiler::compile_str("$.a and $.b").unwrap();
484        let cfg = Cfg::build(&p);
485        let doms = cfg.dominators();
486        assert_eq!(doms.len(), cfg.size());
487        // Entry dominates itself.
488        assert_eq!(doms[cfg.entry], Some(cfg.entry));
489    }
490
491    #[test]
492    fn cfg_dominance_frontiers_sized() {
493        let p = Compiler::compile_str("$.a and $.b").unwrap();
494        let cfg = Cfg::build(&p);
495        let df = cfg.dominance_frontiers();
496        assert_eq!(df.len(), cfg.size());
497    }
498
499    #[test]
500    fn cfg_slot_allocator_distinct() {
501        let p = Compiler::compile_str("let x = $.a in let y = x + 1 in y * 2").unwrap();
502        let cfg = Cfg::build(&p);
503        let live = cfg.liveness();
504        let slots = cfg.allocate_slots(&live);
505        assert!(slots.slots.contains_key("x"));
506        assert!(slots.slots.contains_key("y"));
507    }
508}