1use std::sync::Arc;
13use std::collections::HashSet;
14use super::vm::{Program, Opcode};
15
16#[derive(Debug, Clone)]
19pub struct BasicBlock {
20 pub id: usize,
21 pub ops: Vec<Opcode>,
22 pub branches: Vec<EdgeKind>,
23}
24
25#[derive(Debug, Clone)]
27pub enum EdgeKind {
28 ShortCircuit { target: usize, condition: Condition },
31 Coalesce { target: usize },
33 Loop { target: usize, name: &'static str },
35 Bind { target: usize },
37 Comp { target: usize, part: CompPart },
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum Condition { IfTruthy, IfFalsy, IfNull }
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum CompPart { Expr, Iter, Cond, Key, Val }
46
47#[derive(Debug, Clone)]
48pub struct Cfg {
49 pub blocks: Vec<BasicBlock>,
50 pub entry: usize,
51}
52
53impl Cfg {
54 pub fn build(program: &Program) -> Cfg {
57 let mut cfg = Cfg { blocks: Vec::new(), entry: 0 };
58 cfg.entry = build_block(&mut cfg, &program.ops);
59 cfg
60 }
61
62 pub fn size(&self) -> usize { self.blocks.len() }
64
65 pub fn reachable(&self) -> Vec<usize> {
67 let mut visited = vec![false; self.blocks.len()];
68 let mut queue = vec![self.entry];
69 let mut out = Vec::new();
70 while let Some(id) = queue.pop() {
71 if visited[id] { continue; }
72 visited[id] = true;
73 out.push(id);
74 for e in &self.blocks[id].branches {
75 let t = edge_target(e);
76 queue.push(t);
77 }
78 }
79 out
80 }
81
82 pub fn dominators(&self) -> Vec<Option<usize>> {
84 let n = self.blocks.len();
85 let mut doms: Vec<Option<usize>> = vec![None; n];
86 if n == 0 { return doms; }
87 doms[self.entry] = Some(self.entry);
88 let order = self.reachable();
90 let mut changed = true;
91 while changed {
92 changed = false;
93 for &b in order.iter().rev() {
94 if b == self.entry { continue; }
95 let preds: Vec<usize> = (0..n).filter(|&p|
97 self.blocks[p].branches.iter().any(|e| edge_target(e) == b)
98 ).collect();
99 let mut new_idom: Option<usize> = None;
100 for p in preds {
101 if doms[p].is_none() { continue; }
102 new_idom = Some(match new_idom {
103 None => p,
104 Some(cur) => intersect(&doms, p, cur),
105 });
106 }
107 if doms[b] != new_idom {
108 doms[b] = new_idom;
109 changed = true;
110 }
111 }
112 }
113 doms
114 }
115
116 pub fn preds(&self) -> Vec<Vec<usize>> {
118 let n = self.blocks.len();
119 let mut p: Vec<Vec<usize>> = vec![Vec::new(); n];
120 for (bi, b) in self.blocks.iter().enumerate() {
121 for e in &b.branches { p[edge_target(e)].push(bi); }
122 }
123 p
124 }
125
126 pub fn dominance_frontiers(&self) -> Vec<HashSet<usize>> {
128 let n = self.blocks.len();
129 let doms = self.dominators();
130 let preds = self.preds();
131 let mut df: Vec<HashSet<usize>> = vec![HashSet::new(); n];
132 for b in 0..n {
133 if preds[b].len() < 2 { continue; }
134 let Some(idom_b) = doms[b] else { continue };
135 for &p in &preds[b] {
136 let mut runner = p;
137 while Some(runner) != Some(idom_b) && doms[runner].is_some() {
138 df[runner].insert(b);
139 let next = doms[runner].unwrap();
140 if next == runner { break; }
141 runner = next;
142 }
143 }
144 }
145 df
146 }
147
148 pub fn loop_headers(&self) -> Vec<(usize, usize)> {
151 let doms = self.dominators();
152 let preds = self.preds();
153 let mut out = Vec::new();
154 for (b, ps) in preds.iter().enumerate() {
155 for &p in ps {
156 if dominates(&doms, b, p) { out.push((b, p)); }
157 }
158 }
159 out
160 }
161}
162
163fn dominates(doms: &[Option<usize>], a: usize, mut b: usize) -> bool {
165 loop {
166 if a == b { return true; }
167 let Some(next) = doms[b] else { return false };
168 if next == b { return false; }
169 b = next;
170 }
171}
172
173fn edge_target(e: &EdgeKind) -> usize {
174 match e {
175 EdgeKind::ShortCircuit { target, .. }
176 | EdgeKind::Coalesce { target }
177 | EdgeKind::Loop { target, .. }
178 | EdgeKind::Bind { target }
179 | EdgeKind::Comp { target, .. } => *target,
180 }
181}
182
183fn intersect(doms: &[Option<usize>], mut a: usize, mut b: usize) -> usize {
184 while a != b {
185 while a > b { a = doms[a].unwrap_or(a); if a == doms[a].unwrap_or(a) && a != b { break; } }
186 while b > a { b = doms[b].unwrap_or(b); if b == doms[b].unwrap_or(b) && a != b { break; } }
187 if doms[a].map_or(true, |d| d == a) && doms[b].map_or(true, |d| d == b) { break; }
188 }
189 a
190}
191
192fn build_block(cfg: &mut Cfg, ops: &[Opcode]) -> usize {
193 let id = cfg.blocks.len();
194 cfg.blocks.push(BasicBlock { id, ops: Vec::new(), branches: Vec::new() });
195 let mut straight: Vec<Opcode> = Vec::new();
196 let mut branches: Vec<EdgeKind> = Vec::new();
197 for op in ops {
198 match op {
199 Opcode::AndOp(p) => {
200 let t = build_block(cfg, &p.ops);
201 branches.push(EdgeKind::ShortCircuit { target: t, condition: Condition::IfTruthy });
202 straight.push(op.clone());
203 }
204 Opcode::OrOp(p) => {
205 let t = build_block(cfg, &p.ops);
206 branches.push(EdgeKind::ShortCircuit { target: t, condition: Condition::IfFalsy });
207 straight.push(op.clone());
208 }
209 Opcode::CoalesceOp(p) => {
210 let t = build_block(cfg, &p.ops);
211 branches.push(EdgeKind::Coalesce { target: t });
212 straight.push(op.clone());
213 }
214 Opcode::InlineFilter(p) | Opcode::FilterCount(p)
215 | Opcode::FindFirst(p) | Opcode::FindOne(p)
216 | Opcode::MapSum(p) | Opcode::MapAvg(p)
217 | Opcode::MapFlatten(p)
218 | Opcode::MapFirst(p) | Opcode::MapLast(p)
219 | Opcode::FilterLast { pred: p } => {
220 let t = build_block(cfg, &p.ops);
221 branches.push(EdgeKind::Loop { target: t, name: "filter" });
222 straight.push(op.clone());
223 }
224 Opcode::FilterTakeWhile { pred, stop } => {
225 let tp = build_block(cfg, &pred.ops);
226 let ts = build_block(cfg, &stop.ops);
227 branches.push(EdgeKind::Loop { target: tp, name: "filter" });
228 branches.push(EdgeKind::Loop { target: ts, name: "stop" });
229 straight.push(op.clone());
230 }
231 Opcode::FilterMap { pred, map }
232 | Opcode::FilterMapSum { pred, map }
233 | Opcode::FilterMapAvg { pred, map }
234 | Opcode::FilterMapFirst { pred, map } => {
235 let tp = build_block(cfg, &pred.ops);
236 let tm = build_block(cfg, &map.ops);
237 branches.push(EdgeKind::Loop { target: tp, name: "filter" });
238 branches.push(EdgeKind::Loop { target: tm, name: "map" });
239 straight.push(op.clone());
240 }
241 Opcode::MapFilter { map, pred } => {
242 let tm = build_block(cfg, &map.ops);
243 let tp = build_block(cfg, &pred.ops);
244 branches.push(EdgeKind::Loop { target: tm, name: "map" });
245 branches.push(EdgeKind::Loop { target: tp, name: "filter" });
246 straight.push(op.clone());
247 }
248 Opcode::FilterFilter { p1, p2 } => {
249 let t1 = build_block(cfg, &p1.ops);
250 let t2 = build_block(cfg, &p2.ops);
251 branches.push(EdgeKind::Loop { target: t1, name: "filter1" });
252 branches.push(EdgeKind::Loop { target: t2, name: "filter2" });
253 straight.push(op.clone());
254 }
255 Opcode::MapMap { f1, f2 } => {
256 let t1 = build_block(cfg, &f1.ops);
257 let t2 = build_block(cfg, &f2.ops);
258 branches.push(EdgeKind::Loop { target: t1, name: "map1" });
259 branches.push(EdgeKind::Loop { target: t2, name: "map2" });
260 straight.push(op.clone());
261 }
262 Opcode::LetExpr { body, .. } => {
263 let t = build_block(cfg, &body.ops);
264 branches.push(EdgeKind::Bind { target: t });
265 straight.push(op.clone());
266 }
267 Opcode::ListComp(s) | Opcode::SetComp(s) => {
268 let te = build_block(cfg, &s.expr.ops);
269 let ti = build_block(cfg, &s.iter.ops);
270 branches.push(EdgeKind::Comp { target: te, part: CompPart::Expr });
271 branches.push(EdgeKind::Comp { target: ti, part: CompPart::Iter });
272 if let Some(c) = &s.cond {
273 let tc = build_block(cfg, &c.ops);
274 branches.push(EdgeKind::Comp { target: tc, part: CompPart::Cond });
275 }
276 straight.push(op.clone());
277 }
278 Opcode::DictComp(s) => {
279 let tk = build_block(cfg, &s.key.ops);
280 let tv = build_block(cfg, &s.val.ops);
281 let ti = build_block(cfg, &s.iter.ops);
282 branches.push(EdgeKind::Comp { target: tk, part: CompPart::Key });
283 branches.push(EdgeKind::Comp { target: tv, part: CompPart::Val });
284 branches.push(EdgeKind::Comp { target: ti, part: CompPart::Iter });
285 if let Some(c) = &s.cond {
286 let tc = build_block(cfg, &c.ops);
287 branches.push(EdgeKind::Comp { target: tc, part: CompPart::Cond });
288 }
289 straight.push(op.clone());
290 }
291 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
292 for p in c.sub_progs.iter() {
293 let t = build_block(cfg, &p.ops);
294 branches.push(EdgeKind::Loop { target: t, name: "method" });
295 }
296 straight.push(op.clone());
297 }
298 _ => straight.push(op.clone()),
299 }
300 }
301 cfg.blocks[id].ops = straight;
302 cfg.blocks[id].branches = branches;
303 id
304}
305
306#[allow(dead_code)]
308fn _use_arc<T>(_: Arc<T>) {}
309
310#[derive(Debug, Clone, Default)]
314pub struct Liveness {
315 pub live_in: Vec<HashSet<Arc<str>>>,
316 pub live_out: Vec<HashSet<Arc<str>>>,
317}
318
319impl Cfg {
320 pub fn liveness(&self) -> Liveness {
324 let n = self.blocks.len();
325 let mut live_in: Vec<HashSet<Arc<str>>> = vec![HashSet::new(); n];
326 let mut live_out: Vec<HashSet<Arc<str>>> = vec![HashSet::new(); n];
327
328 let (usev, defv) = (0..n).map(|i| compute_use_def(&self.blocks[i].ops))
330 .fold((Vec::new(), Vec::new()), |(mut u, mut d), (bu, bd)| { u.push(bu); d.push(bd); (u, d) });
331
332 let mut changed = true;
333 while changed {
334 changed = false;
335 for b in 0..n {
336 let mut new_out: HashSet<Arc<str>> = HashSet::new();
338 for e in &self.blocks[b].branches {
339 let s = edge_target(e);
340 new_out.extend(live_in[s].iter().cloned());
341 }
342 let mut new_in = usev[b].clone();
344 for v in &new_out {
345 if !defv[b].contains(v) { new_in.insert(v.clone()); }
346 }
347 if new_in != live_in[b] { live_in[b] = new_in; changed = true; }
348 if new_out != live_out[b]{ live_out[b] = new_out; changed = true; }
349 }
350 }
351 Liveness { live_in, live_out }
352 }
353}
354
355use std::collections::HashMap;
358
359#[derive(Debug, Clone, Default)]
361pub struct SlotMap {
362 pub slots: HashMap<Arc<str>, usize>,
363 pub count: usize,
364}
365
366impl Cfg {
367 pub fn allocate_slots(&self, live: &Liveness) -> SlotMap {
371 let mut all: Vec<Arc<str>> = Vec::new();
373 let mut seen: HashSet<Arc<str>> = HashSet::new();
374 for b in &self.blocks {
375 for op in &b.ops {
376 match op {
377 Opcode::BindVar(n) | Opcode::StoreVar(n)
378 | Opcode::LetExpr { name: n, .. } => {
379 if seen.insert(n.clone()) { all.push(n.clone()); }
380 }
381 _ => {}
382 }
383 }
384 }
385 let mut interf: HashMap<Arc<str>, HashSet<Arc<str>>> = HashMap::new();
387 let add_edge = |a: &Arc<str>, b: &Arc<str>, m: &mut HashMap<Arc<str>, HashSet<Arc<str>>>| {
388 if a != b {
389 m.entry(a.clone()).or_default().insert(b.clone());
390 m.entry(b.clone()).or_default().insert(a.clone());
391 }
392 };
393 for s in live.live_in.iter().chain(live.live_out.iter()) {
394 let v: Vec<&Arc<str>> = s.iter().collect();
395 for i in 0..v.len() {
396 for j in (i+1)..v.len() { add_edge(v[i], v[j], &mut interf); }
397 }
398 }
399 let mut slots: HashMap<Arc<str>, usize> = HashMap::new();
401 let mut count = 0;
402 for name in &all {
403 let neighbours = interf.get(name).cloned().unwrap_or_default();
404 let used: HashSet<usize> = neighbours.iter()
405 .filter_map(|n| slots.get(n).copied()).collect();
406 let slot = (0..).find(|s| !used.contains(s)).unwrap();
407 if slot + 1 > count { count = slot + 1; }
408 slots.insert(name.clone(), slot);
409 }
410 SlotMap { slots, count }
411 }
412}
413
414fn compute_use_def(ops: &[Opcode]) -> (HashSet<Arc<str>>, HashSet<Arc<str>>) {
415 let mut use_set = HashSet::new();
416 let mut def_set = HashSet::new();
417 for op in ops {
418 match op {
419 Opcode::LoadIdent(n) => {
420 if !def_set.contains(n) { use_set.insert(n.clone()); }
421 }
422 Opcode::BindVar(n) | Opcode::StoreVar(n) | Opcode::LetExpr { name: n, .. } => {
423 def_set.insert(n.clone());
424 }
425 _ => {}
426 }
427 }
428 (use_set, def_set)
429}
430
431#[cfg(test)]
432mod tests {
433 use super::*;
434 use crate::vm::Compiler;
435
436 #[test]
437 fn cfg_linear_single_block() {
438 let p = Compiler::compile_str("1 + 2").unwrap();
439 let cfg = Cfg::build(&p);
440 assert_eq!(cfg.size(), 1);
441 }
442
443 #[test]
444 fn cfg_and_creates_branch() {
445 let p = Compiler::compile_str("$.a and $.b").unwrap();
446 let cfg = Cfg::build(&p);
447 assert!(cfg.size() >= 2, "AndOp should create child block");
448 let root = &cfg.blocks[cfg.entry];
449 assert!(root.branches.iter().any(|e| matches!(e,
450 EdgeKind::ShortCircuit { condition: Condition::IfTruthy, .. })));
451 }
452
453 #[test]
454 fn cfg_filter_creates_loop() {
455 let p = Compiler::compile_str("$.x.filter(@.a > 1 and @.a < 10)").unwrap();
458 let cfg = Cfg::build(&p);
459 assert!(cfg.size() >= 2);
460 }
461
462 #[test]
463 fn cfg_reachable_covers_all() {
464 let p = Compiler::compile_str("$.a.filter(@.x > 1).map(@.y)").unwrap();
465 let cfg = Cfg::build(&p);
466 let r = cfg.reachable();
467 assert_eq!(r.len(), cfg.size());
468 }
469
470 #[test]
471 fn cfg_liveness_tracks_let_body() {
472 let p = Compiler::compile_str("let x = $.a in x + x").unwrap();
473 let cfg = Cfg::build(&p);
474 let live = cfg.liveness();
475 let body_has_x = live.live_in.iter().any(|s|
477 s.iter().any(|n| n.as_ref() == "x"));
478 assert!(body_has_x, "x should be live inside let body");
479 }
480
481 #[test]
482 fn cfg_dominators_nonempty() {
483 let p = Compiler::compile_str("$.a and $.b").unwrap();
484 let cfg = Cfg::build(&p);
485 let doms = cfg.dominators();
486 assert_eq!(doms.len(), cfg.size());
487 assert_eq!(doms[cfg.entry], Some(cfg.entry));
489 }
490
491 #[test]
492 fn cfg_dominance_frontiers_sized() {
493 let p = Compiler::compile_str("$.a and $.b").unwrap();
494 let cfg = Cfg::build(&p);
495 let df = cfg.dominance_frontiers();
496 assert_eq!(df.len(), cfg.size());
497 }
498
499 #[test]
500 fn cfg_slot_allocator_distinct() {
501 let p = Compiler::compile_str("let x = $.a in let y = x + 1 in y * 2").unwrap();
502 let cfg = Cfg::build(&p);
503 let live = cfg.liveness();
504 let slots = cfg.allocate_slots(&live);
505 assert!(slots.slots.contains_key("x"));
506 assert!(slots.slots.contains_key("y"));
507 }
508}