1use std::sync::Arc;
13use std::collections::HashSet;
14use super::vm::{Program, Opcode};
15
16#[derive(Debug, Clone)]
19pub struct BasicBlock {
20 pub id: usize,
21 pub ops: Vec<Opcode>,
22 pub branches: Vec<EdgeKind>,
23}
24
25#[derive(Debug, Clone)]
27pub enum EdgeKind {
28 ShortCircuit { target: usize, condition: Condition },
31 Coalesce { target: usize },
33 Loop { target: usize, name: &'static str },
35 Bind { target: usize },
37 Comp { target: usize, part: CompPart },
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum Condition { IfTruthy, IfFalsy, IfNull }
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum CompPart { Expr, Iter, Cond, Key, Val }
46
47#[derive(Debug, Clone)]
48pub struct Cfg {
49 pub blocks: Vec<BasicBlock>,
50 pub entry: usize,
51}
52
53impl Cfg {
54 pub fn build(program: &Program) -> Cfg {
57 let mut cfg = Cfg { blocks: Vec::new(), entry: 0 };
58 cfg.entry = build_block(&mut cfg, &program.ops);
59 cfg
60 }
61
62 pub fn size(&self) -> usize { self.blocks.len() }
64
65 pub fn reachable(&self) -> Vec<usize> {
67 let mut visited = vec![false; self.blocks.len()];
68 let mut queue = vec![self.entry];
69 let mut out = Vec::new();
70 while let Some(id) = queue.pop() {
71 if visited[id] { continue; }
72 visited[id] = true;
73 out.push(id);
74 for e in &self.blocks[id].branches {
75 let t = edge_target(e);
76 queue.push(t);
77 }
78 }
79 out
80 }
81
82 pub fn dominators(&self) -> Vec<Option<usize>> {
84 let n = self.blocks.len();
85 let mut doms: Vec<Option<usize>> = vec![None; n];
86 if n == 0 { return doms; }
87 doms[self.entry] = Some(self.entry);
88 let order = self.reachable();
90 let mut changed = true;
91 while changed {
92 changed = false;
93 for &b in order.iter().rev() {
94 if b == self.entry { continue; }
95 let preds: Vec<usize> = (0..n).filter(|&p|
97 self.blocks[p].branches.iter().any(|e| edge_target(e) == b)
98 ).collect();
99 let mut new_idom: Option<usize> = None;
100 for p in preds {
101 if doms[p].is_none() { continue; }
102 new_idom = Some(match new_idom {
103 None => p,
104 Some(cur) => intersect(&doms, p, cur),
105 });
106 }
107 if doms[b] != new_idom {
108 doms[b] = new_idom;
109 changed = true;
110 }
111 }
112 }
113 doms
114 }
115
116 pub fn preds(&self) -> Vec<Vec<usize>> {
118 let n = self.blocks.len();
119 let mut p: Vec<Vec<usize>> = vec![Vec::new(); n];
120 for (bi, b) in self.blocks.iter().enumerate() {
121 for e in &b.branches { p[edge_target(e)].push(bi); }
122 }
123 p
124 }
125
126 pub fn dominance_frontiers(&self) -> Vec<HashSet<usize>> {
128 let n = self.blocks.len();
129 let doms = self.dominators();
130 let preds = self.preds();
131 let mut df: Vec<HashSet<usize>> = vec![HashSet::new(); n];
132 for b in 0..n {
133 if preds[b].len() < 2 { continue; }
134 let Some(idom_b) = doms[b] else { continue };
135 for &p in &preds[b] {
136 let mut runner = p;
137 while Some(runner) != Some(idom_b) && doms[runner].is_some() {
138 df[runner].insert(b);
139 let next = doms[runner].unwrap();
140 if next == runner { break; }
141 runner = next;
142 }
143 }
144 }
145 df
146 }
147
148 pub fn loop_headers(&self) -> Vec<(usize, usize)> {
151 let doms = self.dominators();
152 let preds = self.preds();
153 let mut out = Vec::new();
154 for (b, ps) in preds.iter().enumerate() {
155 for &p in ps {
156 if dominates(&doms, b, p) { out.push((b, p)); }
157 }
158 }
159 out
160 }
161}
162
163fn dominates(doms: &[Option<usize>], a: usize, mut b: usize) -> bool {
165 loop {
166 if a == b { return true; }
167 let Some(next) = doms[b] else { return false };
168 if next == b { return false; }
169 b = next;
170 }
171}
172
173fn edge_target(e: &EdgeKind) -> usize {
174 match e {
175 EdgeKind::ShortCircuit { target, .. }
176 | EdgeKind::Coalesce { target }
177 | EdgeKind::Loop { target, .. }
178 | EdgeKind::Bind { target }
179 | EdgeKind::Comp { target, .. } => *target,
180 }
181}
182
183fn intersect(doms: &[Option<usize>], mut a: usize, mut b: usize) -> usize {
184 while a != b {
185 while a > b { a = doms[a].unwrap_or(a); if a == doms[a].unwrap_or(a) && a != b { break; } }
186 while b > a { b = doms[b].unwrap_or(b); if b == doms[b].unwrap_or(b) && a != b { break; } }
187 if doms[a].map_or(true, |d| d == a) && doms[b].map_or(true, |d| d == b) { break; }
188 }
189 a
190}
191
192fn build_block(cfg: &mut Cfg, ops: &[Opcode]) -> usize {
193 let id = cfg.blocks.len();
194 cfg.blocks.push(BasicBlock { id, ops: Vec::new(), branches: Vec::new() });
195 let mut straight: Vec<Opcode> = Vec::new();
196 let mut branches: Vec<EdgeKind> = Vec::new();
197 for op in ops {
198 match op {
199 Opcode::AndOp(p) => {
200 let t = build_block(cfg, &p.ops);
201 branches.push(EdgeKind::ShortCircuit { target: t, condition: Condition::IfTruthy });
202 straight.push(op.clone());
203 }
204 Opcode::OrOp(p) => {
205 let t = build_block(cfg, &p.ops);
206 branches.push(EdgeKind::ShortCircuit { target: t, condition: Condition::IfFalsy });
207 straight.push(op.clone());
208 }
209 Opcode::CoalesceOp(p) => {
210 let t = build_block(cfg, &p.ops);
211 branches.push(EdgeKind::Coalesce { target: t });
212 straight.push(op.clone());
213 }
214 Opcode::InlineFilter(p) | Opcode::FilterCount(p)
215 | Opcode::FindFirst(p) | Opcode::FindOne(p)
216 | Opcode::MapSum(p) | Opcode::MapAvg(p)
217 | Opcode::MapFlatten(p) => {
218 let t = build_block(cfg, &p.ops);
219 branches.push(EdgeKind::Loop { target: t, name: "filter" });
220 straight.push(op.clone());
221 }
222 Opcode::FilterTakeWhile { pred, stop } => {
223 let tp = build_block(cfg, &pred.ops);
224 let ts = build_block(cfg, &stop.ops);
225 branches.push(EdgeKind::Loop { target: tp, name: "filter" });
226 branches.push(EdgeKind::Loop { target: ts, name: "stop" });
227 straight.push(op.clone());
228 }
229 Opcode::FilterMap { pred, map } => {
230 let tp = build_block(cfg, &pred.ops);
231 let tm = build_block(cfg, &map.ops);
232 branches.push(EdgeKind::Loop { target: tp, name: "filter" });
233 branches.push(EdgeKind::Loop { target: tm, name: "map" });
234 straight.push(op.clone());
235 }
236 Opcode::FilterFilter { p1, p2 } => {
237 let t1 = build_block(cfg, &p1.ops);
238 let t2 = build_block(cfg, &p2.ops);
239 branches.push(EdgeKind::Loop { target: t1, name: "filter1" });
240 branches.push(EdgeKind::Loop { target: t2, name: "filter2" });
241 straight.push(op.clone());
242 }
243 Opcode::MapMap { f1, f2 } => {
244 let t1 = build_block(cfg, &f1.ops);
245 let t2 = build_block(cfg, &f2.ops);
246 branches.push(EdgeKind::Loop { target: t1, name: "map1" });
247 branches.push(EdgeKind::Loop { target: t2, name: "map2" });
248 straight.push(op.clone());
249 }
250 Opcode::LetExpr { body, .. } => {
251 let t = build_block(cfg, &body.ops);
252 branches.push(EdgeKind::Bind { target: t });
253 straight.push(op.clone());
254 }
255 Opcode::ListComp(s) | Opcode::SetComp(s) => {
256 let te = build_block(cfg, &s.expr.ops);
257 let ti = build_block(cfg, &s.iter.ops);
258 branches.push(EdgeKind::Comp { target: te, part: CompPart::Expr });
259 branches.push(EdgeKind::Comp { target: ti, part: CompPart::Iter });
260 if let Some(c) = &s.cond {
261 let tc = build_block(cfg, &c.ops);
262 branches.push(EdgeKind::Comp { target: tc, part: CompPart::Cond });
263 }
264 straight.push(op.clone());
265 }
266 Opcode::DictComp(s) => {
267 let tk = build_block(cfg, &s.key.ops);
268 let tv = build_block(cfg, &s.val.ops);
269 let ti = build_block(cfg, &s.iter.ops);
270 branches.push(EdgeKind::Comp { target: tk, part: CompPart::Key });
271 branches.push(EdgeKind::Comp { target: tv, part: CompPart::Val });
272 branches.push(EdgeKind::Comp { target: ti, part: CompPart::Iter });
273 if let Some(c) = &s.cond {
274 let tc = build_block(cfg, &c.ops);
275 branches.push(EdgeKind::Comp { target: tc, part: CompPart::Cond });
276 }
277 straight.push(op.clone());
278 }
279 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
280 for p in c.sub_progs.iter() {
281 let t = build_block(cfg, &p.ops);
282 branches.push(EdgeKind::Loop { target: t, name: "method" });
283 }
284 straight.push(op.clone());
285 }
286 _ => straight.push(op.clone()),
287 }
288 }
289 cfg.blocks[id].ops = straight;
290 cfg.blocks[id].branches = branches;
291 id
292}
293
294#[allow(dead_code)]
296fn _use_arc<T>(_: Arc<T>) {}
297
298#[derive(Debug, Clone, Default)]
302pub struct Liveness {
303 pub live_in: Vec<HashSet<Arc<str>>>,
304 pub live_out: Vec<HashSet<Arc<str>>>,
305}
306
307impl Cfg {
308 pub fn liveness(&self) -> Liveness {
312 let n = self.blocks.len();
313 let mut live_in: Vec<HashSet<Arc<str>>> = vec![HashSet::new(); n];
314 let mut live_out: Vec<HashSet<Arc<str>>> = vec![HashSet::new(); n];
315
316 let (usev, defv) = (0..n).map(|i| compute_use_def(&self.blocks[i].ops))
318 .fold((Vec::new(), Vec::new()), |(mut u, mut d), (bu, bd)| { u.push(bu); d.push(bd); (u, d) });
319
320 let mut changed = true;
321 while changed {
322 changed = false;
323 for b in 0..n {
324 let mut new_out: HashSet<Arc<str>> = HashSet::new();
326 for e in &self.blocks[b].branches {
327 let s = edge_target(e);
328 new_out.extend(live_in[s].iter().cloned());
329 }
330 let mut new_in = usev[b].clone();
332 for v in &new_out {
333 if !defv[b].contains(v) { new_in.insert(v.clone()); }
334 }
335 if new_in != live_in[b] { live_in[b] = new_in; changed = true; }
336 if new_out != live_out[b]{ live_out[b] = new_out; changed = true; }
337 }
338 }
339 Liveness { live_in, live_out }
340 }
341}
342
343use std::collections::HashMap;
346
347#[derive(Debug, Clone, Default)]
349pub struct SlotMap {
350 pub slots: HashMap<Arc<str>, usize>,
351 pub count: usize,
352}
353
354impl Cfg {
355 pub fn allocate_slots(&self, live: &Liveness) -> SlotMap {
359 let mut all: Vec<Arc<str>> = Vec::new();
361 let mut seen: HashSet<Arc<str>> = HashSet::new();
362 for b in &self.blocks {
363 for op in &b.ops {
364 match op {
365 Opcode::BindVar(n) | Opcode::StoreVar(n)
366 | Opcode::LetExpr { name: n, .. } => {
367 if seen.insert(n.clone()) { all.push(n.clone()); }
368 }
369 _ => {}
370 }
371 }
372 }
373 let mut interf: HashMap<Arc<str>, HashSet<Arc<str>>> = HashMap::new();
375 let add_edge = |a: &Arc<str>, b: &Arc<str>, m: &mut HashMap<Arc<str>, HashSet<Arc<str>>>| {
376 if a != b {
377 m.entry(a.clone()).or_default().insert(b.clone());
378 m.entry(b.clone()).or_default().insert(a.clone());
379 }
380 };
381 for s in live.live_in.iter().chain(live.live_out.iter()) {
382 let v: Vec<&Arc<str>> = s.iter().collect();
383 for i in 0..v.len() {
384 for j in (i+1)..v.len() { add_edge(v[i], v[j], &mut interf); }
385 }
386 }
387 let mut slots: HashMap<Arc<str>, usize> = HashMap::new();
389 let mut count = 0;
390 for name in &all {
391 let neighbours = interf.get(name).cloned().unwrap_or_default();
392 let used: HashSet<usize> = neighbours.iter()
393 .filter_map(|n| slots.get(n).copied()).collect();
394 let slot = (0..).find(|s| !used.contains(s)).unwrap();
395 if slot + 1 > count { count = slot + 1; }
396 slots.insert(name.clone(), slot);
397 }
398 SlotMap { slots, count }
399 }
400}
401
402fn compute_use_def(ops: &[Opcode]) -> (HashSet<Arc<str>>, HashSet<Arc<str>>) {
403 let mut use_set = HashSet::new();
404 let mut def_set = HashSet::new();
405 for op in ops {
406 match op {
407 Opcode::LoadIdent(n) => {
408 if !def_set.contains(n) { use_set.insert(n.clone()); }
409 }
410 Opcode::BindVar(n) | Opcode::StoreVar(n) | Opcode::LetExpr { name: n, .. } => {
411 def_set.insert(n.clone());
412 }
413 _ => {}
414 }
415 }
416 (use_set, def_set)
417}
418
419#[cfg(test)]
420mod tests {
421 use super::*;
422 use crate::vm::Compiler;
423
424 #[test]
425 fn cfg_linear_single_block() {
426 let p = Compiler::compile_str("1 + 2").unwrap();
427 let cfg = Cfg::build(&p);
428 assert_eq!(cfg.size(), 1);
429 }
430
431 #[test]
432 fn cfg_and_creates_branch() {
433 let p = Compiler::compile_str("$.a and $.b").unwrap();
434 let cfg = Cfg::build(&p);
435 assert!(cfg.size() >= 2, "AndOp should create child block");
436 let root = &cfg.blocks[cfg.entry];
437 assert!(root.branches.iter().any(|e| matches!(e,
438 EdgeKind::ShortCircuit { condition: Condition::IfTruthy, .. })));
439 }
440
441 #[test]
442 fn cfg_filter_creates_loop() {
443 let p = Compiler::compile_str("$.x.filter(@.a > 1)").unwrap();
444 let cfg = Cfg::build(&p);
445 assert!(cfg.size() >= 2);
446 }
447
448 #[test]
449 fn cfg_reachable_covers_all() {
450 let p = Compiler::compile_str("$.a.filter(@.x > 1).map(@.y)").unwrap();
451 let cfg = Cfg::build(&p);
452 let r = cfg.reachable();
453 assert_eq!(r.len(), cfg.size());
454 }
455
456 #[test]
457 fn cfg_liveness_tracks_let_body() {
458 let p = Compiler::compile_str("let x = $.a in x + x").unwrap();
459 let cfg = Cfg::build(&p);
460 let live = cfg.liveness();
461 let body_has_x = live.live_in.iter().any(|s|
463 s.iter().any(|n| n.as_ref() == "x"));
464 assert!(body_has_x, "x should be live inside let body");
465 }
466
467 #[test]
468 fn cfg_dominators_nonempty() {
469 let p = Compiler::compile_str("$.a and $.b").unwrap();
470 let cfg = Cfg::build(&p);
471 let doms = cfg.dominators();
472 assert_eq!(doms.len(), cfg.size());
473 assert_eq!(doms[cfg.entry], Some(cfg.entry));
475 }
476
477 #[test]
478 fn cfg_dominance_frontiers_sized() {
479 let p = Compiler::compile_str("$.a and $.b").unwrap();
480 let cfg = Cfg::build(&p);
481 let df = cfg.dominance_frontiers();
482 assert_eq!(df.len(), cfg.size());
483 }
484
485 #[test]
486 fn cfg_slot_allocator_distinct() {
487 let p = Compiler::compile_str("let x = $.a in let y = x + 1 in y * 2").unwrap();
488 let cfg = Cfg::build(&p);
489 let live = cfg.liveness();
490 let slots = cfg.allocate_slots(&live);
491 assert!(slots.slots.contains_key("x"));
492 assert!(slots.slots.contains_key("y"));
493 }
494}