Skip to main content

lex_bytecode/
compiler.rs

1//! M4 compiler: canonical AST → bytecode.
2
3use crate::op::*;
4use crate::program::*;
5use indexmap::IndexMap;
6use lex_ast as a;
7
8#[derive(Default)]
9struct ConstPool {
10    pool: Vec<Const>,
11    fields: IndexMap<String, u32>,
12    variants: IndexMap<String, u32>,
13    node_ids: IndexMap<String, u32>,
14    ints: IndexMap<i64, u32>,
15    bools: IndexMap<u8, u32>,
16    strs: IndexMap<String, u32>,
17    /// Interned record field-name shapes (#461). Deduplicated by content
18    /// so a record literal with the same field-name layout reuses the
19    /// same `shape_idx` across the whole program — keeps the side-table
20    /// small even when the same struct is constructed in many places.
21    record_shapes: Vec<Vec<u32>>,
22    record_shape_dedup: IndexMap<Vec<u32>, u32>,
23}
24
25impl ConstPool {
26    fn field(&mut self, name: &str) -> u32 {
27        if let Some(i) = self.fields.get(name) { return *i; }
28        let i = self.pool.len() as u32;
29        self.pool.push(Const::FieldName(name.into()));
30        self.fields.insert(name.into(), i);
31        i
32    }
33    fn variant(&mut self, name: &str) -> u32 {
34        if let Some(i) = self.variants.get(name) { return *i; }
35        let i = self.pool.len() as u32;
36        self.pool.push(Const::VariantName(name.into()));
37        self.variants.insert(name.into(), i);
38        i
39    }
40    fn node_id(&mut self, name: &str) -> u32 {
41        if let Some(i) = self.node_ids.get(name) { return *i; }
42        let i = self.pool.len() as u32;
43        self.pool.push(Const::NodeId(name.into()));
44        self.node_ids.insert(name.into(), i);
45        i
46    }
47    fn int(&mut self, n: i64) -> u32 {
48        if let Some(i) = self.ints.get(&n) { return *i; }
49        let i = self.pool.len() as u32;
50        self.pool.push(Const::Int(n));
51        self.ints.insert(n, i);
52        i
53    }
54    fn bool(&mut self, b: bool) -> u32 {
55        let key = b as u8;
56        if let Some(i) = self.bools.get(&key) { return *i; }
57        let i = self.pool.len() as u32;
58        self.pool.push(Const::Bool(b));
59        self.bools.insert(key, i);
60        i
61    }
62    fn str(&mut self, s: &str) -> u32 {
63        if let Some(i) = self.strs.get(s) { return *i; }
64        let i = self.pool.len() as u32;
65        self.pool.push(Const::Str(s.into()));
66        self.strs.insert(s.into(), i);
67        i
68    }
69    fn float(&mut self, f: f64) -> u32 {
70        // Floats: not deduped (NaN issues).
71        let i = self.pool.len() as u32;
72        self.pool.push(Const::Float(f));
73        i
74    }
75    fn unit(&mut self) -> u32 {
76        let i = self.pool.len() as u32;
77        self.pool.push(Const::Unit);
78        i
79    }
80
81    /// Intern a record field-name shape (#461). Returns the index into
82    /// `record_shapes`; identical shapes (same field-name-index vec)
83    /// always return the same index.
84    fn record_shape(&mut self, idxs: Vec<u32>) -> u32 {
85        if let Some(i) = self.record_shape_dedup.get(&idxs) {
86            return *i;
87        }
88        let i = self.record_shapes.len() as u32;
89        self.record_shape_dedup.insert(idxs.clone(), i);
90        self.record_shapes.push(idxs);
91        i
92    }
93}
94
95pub fn compile_program(stages: &[a::Stage]) -> Program {
96    let mut p = Program {
97        constants: Vec::new(),
98        functions: Vec::new(),
99        function_names: IndexMap::new(),
100        module_aliases: IndexMap::new(),
101        entry: None,
102        record_shapes: Vec::new(),
103    };
104
105    // Collect imports as alias → module-name. The module name is the part
106    // after `std.` (so `import "std.io" as io` ⇒ alias `io` → module `io`).
107    for s in stages {
108        if let a::Stage::Import(i) = s {
109            let module = i.reference.strip_prefix("std.").unwrap_or(&i.reference).to_string();
110            p.module_aliases.insert(i.alias.clone(), module);
111        }
112    }
113
114    for s in stages {
115        if let a::Stage::FnDecl(fd) = s {
116            let idx = p.functions.len() as u32;
117            p.function_names.insert(fd.name.clone(), idx);
118            p.functions.push(Function {
119                name: fd.name.clone(),
120                arity: fd.params.len() as u16,
121                locals_count: 0,
122                code: Vec::new(),
123                effects: fd.effects.iter().map(|e| DeclaredEffect {
124                    kind: e.name.clone(),
125                    arg: e.arg.as_ref().map(|a| match a {
126                        a::EffectArg::Str { value } => EffectArg::Str(value.clone()),
127                        a::EffectArg::Int { value } => EffectArg::Int(*value),
128                        a::EffectArg::Ident { value } => EffectArg::Ident(value.clone()),
129                    }),
130                }).collect(),
131                // Filled in at the end of the compile pass, once `code`
132                // and `locals_count` are final. See #222.
133                body_hash: crate::program::ZERO_BODY_HASH,
134                // Per-param refinement predicates for runtime check
135                // (#209 slice 3). Lifted directly from each param's
136                // `TypeExpr::Refined` if present; `None` otherwise.
137                refinements: fd.params.iter().map(|p| match &p.ty {
138                    a::TypeExpr::Refined { binding, predicate, .. } =>
139                        Some(crate::program::Refinement {
140                            binding: binding.clone(),
141                            predicate: (**predicate).clone(),
142                        }),
143                    _ => None,
144                }).collect(),
145                // Filled in below once the FnCompiler counts emit sites.
146                field_ic_sites: 0,
147            });
148        }
149    }
150
151    let mut pool = ConstPool::default();
152    let function_names = p.function_names.clone();
153    let module_aliases = p.module_aliases.clone();
154    let mut pending_lambdas: Vec<PendingLambda> = Vec::new();
155    // #461 slice 7: collect `type Foo = { ... }` aliases so
156    // `record_field_types` can resolve a parameter's named record
157    // type to its field layout. Without this, `r :: R` where
158    // `type R = { x :: Int, y :: Int }` falls through to Unknown
159    // and the typed-Add lowering misses on `r.x + r.y`.
160    let mut type_aliases: IndexMap<String, a::TypeExpr> = IndexMap::new();
161    for s in stages {
162        if let a::Stage::TypeDecl(td) = s {
163            // Parameterized type aliases (`type Box[T] = ...`) are
164            // out of scope for this slice — without monomorphization
165            // we can't know what T resolves to. Skip them.
166            if td.params.is_empty() {
167                type_aliases.insert(td.name.clone(), td.definition.clone());
168            }
169        }
170    }
171
172    for s in stages {
173        if let a::Stage::FnDecl(_) = s {
174            // Build a NodeId map for *this* stage so the compiler can stamp
175            // each Call/EffectCall opcode with the originating AST node.
176            let id_map = lex_ast::expr_ids(s);
177            let fd = match s { a::Stage::FnDecl(fd) => fd, _ => unreachable!() };
178            let mut fc = FnCompiler {
179                code: Vec::new(),
180                locals: IndexMap::new(),
181                next_local: 0,
182                peak_local: 0,
183                local_types: IndexMap::new(),
184                local_record_field_types: IndexMap::new(),
185                field_get_sites: 0,
186                pool: &mut pool,
187                function_names: &function_names,
188                module_aliases: &module_aliases,
189                id_map: &id_map,
190                pending_lambdas: &mut pending_lambdas,
191                next_fn_id: &mut p.functions,
192            };
193            for param in &fd.params {
194                let i = fc.next_local;
195                fc.locals.insert(param.name.clone(), i);
196                fc.local_types.insert(param.name.clone(), classify_type_expr(&param.ty));
197                // #461 slice 7: inline-record parameter (`r ::
198                // { x :: Int, y :: Int }`) — populate the per-local
199                // field-type map so `r.x + r.y` classifies as
200                // Int+Int → IntAdd, which slice 7 then fuses.
201                if let Some(ftypes) = record_field_types(&param.ty, &type_aliases) {
202                    fc.local_record_field_types.insert(param.name.clone(), ftypes);
203                }
204                fc.next_local += 1;
205                fc.peak_local = fc.next_local;
206            }
207            fc.compile_expr(&fd.body, true);
208            fc.code.push(Op::Return);
209            let code = std::mem::take(&mut fc.code);
210            let peak = fc.peak_local;
211            let field_sites = fc.field_get_sites as u16;
212            drop(fc);
213            let idx = function_names[&fd.name];
214            p.functions[idx as usize].code = code;
215            p.functions[idx as usize].field_ic_sites = field_sites;
216            p.functions[idx as usize].locals_count = peak;
217        }
218    }
219
220    // Compile pending lambdas in FIFO order. Each lambda may emit further
221    // lambdas; loop until the queue drains.
222    while let Some(pl) = pending_lambdas.pop() {
223        let id_map = std::collections::HashMap::new();
224        let mut fc = FnCompiler {
225            code: Vec::new(),
226            locals: IndexMap::new(),
227            next_local: 0,
228            peak_local: 0,
229            local_types: IndexMap::new(),
230            local_record_field_types: IndexMap::new(),
231            field_get_sites: 0,
232            pool: &mut pool,
233            function_names: &function_names,
234            module_aliases: &module_aliases,
235            id_map: &id_map,
236            pending_lambdas: &mut pending_lambdas,
237            next_fn_id: &mut p.functions,
238        };
239        for name in &pl.capture_names {
240            let i = fc.next_local;
241            fc.locals.insert(name.clone(), i);
242            // Captures' static types aren't known at this layer
243            // — the closure's environment carries them dynamically.
244            // Conservative fallback; binop lowering stays correct
245            // because Unknown classifies through to NumAdd.
246            fc.local_types.insert(name.clone(), NumTy::Unknown);
247            fc.next_local += 1;
248            fc.peak_local = fc.next_local;
249        }
250        for p in &pl.params {
251            let i = fc.next_local;
252            fc.locals.insert(p.name.clone(), i);
253            fc.local_types.insert(p.name.clone(), classify_type_expr(&p.ty));
254            fc.next_local += 1;
255            fc.peak_local = fc.next_local;
256        }
257        fc.compile_expr(&pl.body, true);
258        fc.code.push(Op::Return);
259        let code = std::mem::take(&mut fc.code);
260        let peak = fc.peak_local;
261        let field_sites = fc.field_get_sites as u16;
262        drop(fc);
263        p.functions[pl.fn_id as usize].code = code;
264        p.functions[pl.fn_id as usize].field_ic_sites = field_sites;
265        p.functions[pl.fn_id as usize].locals_count = peak;
266    }
267
268    // #464 step 2: escape-analysis-driven lowering. Rewrites
269    // `MakeRecord` at non-escaping sites to `AllocStackRecord`, which
270    // the VM allocates in the frame's stack-record arena instead of
271    // on the heap. Runs on raw bytecode (before the peephole passes)
272    // so the escape analysis — which itself walks raw bytecode — sees
273    // exactly the program it was designed for.
274    //
275    // The peephole passes that follow do not match on MakeRecord /
276    // AllocStackRecord, so swapping one for the other doesn't disturb
277    // any pattern. `compute_body_hash` lowers AllocStackRecord back
278    // to the legacy MakeRecord form (#222), so closure identity is
279    // invariant under this lowering.
280    //
281    // Escape hatch: `LEX_NO_STACK_RECORDS=1` skips the lowering
282    // entirely (#464 step 3). The flag exists so the bench can A/B
283    // the same source under matched VM/peephole conditions; in
284    // production code the pass always runs.
285    if std::env::var_os("LEX_NO_STACK_RECORDS").is_none() {
286        let escape_index = crate::escape::build_escape_index(&p.functions);
287        for f in p.functions.iter_mut() {
288            apply_escape_lowering(&mut f.code, &f.name, &escape_index);
289        }
290    }
291
292    // Peephole pass (#461 superinstructions). Rewrites fusable opcode
293    // patterns into single dispatch steps. Runs before `body_hash`
294    // computation, but `compute_body_hash` decomposes each fused op
295    // back to its primitive form on hash — so closure identity (#222)
296    // is invariant under this pass and the order doesn't matter.
297    //
298    // Slices run sequentially: slice 2 looks for slice-1 output
299    // followed by a StoreLocal, so it must follow slice 1. Slice 3
300    // (LoadLocal + LoadLocal + IntAdd) is disjoint from both — its
301    // second slot is LoadLocal, not PushConst — so it can run in
302    // either order. Run it last to keep the slice 1/2 contract
303    // (slice 2 expects to see slice-1 output) untouched. Slice 4 is
304    // slice 3 for IntSub / IntMul (same pattern, different terminator);
305    // disjoint from every prior slice because the terminator op
306    // disambiguates, so order between slice 3 and slice 4 is free.
307    for f in p.functions.iter_mut() {
308        apply_peephole(&mut f.code, &pool.pool);
309        apply_peephole_slice2(&mut f.code);
310        apply_peephole_slice3(&mut f.code);
311        apply_peephole_slice4(&mut f.code);
312        // Slice 5 — jump-aware fusion of the loop-condition idiom
313        // (LoadLocal + LoadLocal/PushConst + IntLt + JumpIfNot).
314        // Runs after slices 3/4 because their 3-slot windows
315        // overlap slice 5's 4-slot window at position 0 and 1; if
316        // slice 3 fired first and consumed `LoadLocal + LoadLocal +
317        // IntAdd`, the `IntLt + JumpIfNot` that follows would not
318        // be a fusion candidate. Since slice 3's terminator is
319        // `IntAdd` and slice 5's is `IntLt`, the two don't compete
320        // on the same site — order between them is technically free
321        // but conventionally slice N runs after slice N-1.
322        apply_peephole_slice5(&mut f.code, &pool.pool);
323        // Slice 6 — absorb the match-scrutinee dance
324        // (`LoadLocal + StoreLocal` immediately preceding a slice-5
325        // fused op that reads the just-stored local). Must run after
326        // slice 5 since it matches on slice 5's output.
327        apply_peephole_slice6(&mut f.code);
328        // Slice 7/8 — fuse `LoadLocal + GetField + IntAdd|IntSub|IntMul`,
329        // the accumulator-with-field-read idiom. Disjoint from every
330        // earlier slice (only this one matches a GetField at slot 1),
331        // so order is independent — placed near the end for chronology.
332        apply_peephole_slice7(&mut f.code);
333        // Slice 9 — fuse the *remaining* bare `LoadLocal + GetField`
334        // pairs (those slice 7/8 didn't consume): chain-head field
335        // reads (`r.x` in `r.x + r.y`), standalone `r.field` reads
336        // (`r.total`), and field reads feeding non-add/sub/mul ops.
337        // MUST run after slice 7/8 — otherwise it would greedily eat
338        // the `LoadLocal + GetField` prefix of an `acc OP r.field`
339        // triple and prevent the 3-op fusion. Slice 7/8's tombstone
340        // GetFields are preceded by their fused op (not a bare
341        // LoadLocal), so slice 9 never matches them.
342        apply_peephole_slice9(&mut f.code);
343    }
344
345    // Final pass: stamp every function with its content hash now that
346    // every body is finalized (#222). Trampolines installed via
347    // `install_trampoline` already have it; recomputing is cheap and
348    // makes the invariant easier to read at this top level.
349    for f in p.functions.iter_mut() {
350        if f.body_hash == crate::program::ZERO_BODY_HASH {
351            f.body_hash = crate::program::compute_body_hash(
352                f.arity, f.locals_count, &f.code, &pool.record_shapes);
353        }
354    }
355
356    p.constants = pool.pool;
357    p.record_shapes = pool.record_shapes;
358    p
359}
360
361/// Peephole pass: rewrite fusable opcode patterns into superinstructions
362/// (#461). Each fused op claims its own slot in the code stream; the
363/// trailing primitive ops it absorbs stay in place as inert
364/// "tombstones" — the dispatch loop overrides its default `pc += 1`
365/// to step past them. Leaving the tombstones in place keeps
366/// `code.len()` invariant and means we don't have to renumber jump
367/// offsets.
368///
369/// Pattern (slice 1): `LoadLocal(i), PushConst(c), IntAdd` where
370/// `constants[c]` is a `Const::Int`. Fused to
371/// `LoadLocalAddIntConst { local_idx: i, imm_const_idx: c }`.
372/// Safety: the second and third slots must not be reachable from
373/// any Jump / JumpIf / JumpIfNot — otherwise a jump would land on a
374/// tombstone instead of the live op the source intended. The
375/// pre-pass below collects every jump target in the function and
376/// skips fusion sites whose tombstones overlap one.
377/// #464 step 2 — rewrite `MakeRecord` to `AllocStackRecord` at sites
378/// the escape analysis (`crate::escape::build_escape_index`) proved
379/// non-escaping. Each rewrite is a single-slot swap that preserves
380/// pc, stack delta, and shape semantics — jump targets, the peephole
381/// passes downstream, and the body-hash decoder all see the same
382/// program shape they would have seen for the unlowered code.
383///
384/// Sites that escape are left as-is and still incur the
385/// IndexMap-backed heap allocation. Step 3 of #464 carries the
386/// bench acceptance bars (≥1.5× speedup on `response_build`); this
387/// pass is the precondition.
388fn apply_escape_lowering(
389    code: &mut [Op],
390    fn_name: &str,
391    escape_index: &std::collections::HashMap<(String, u32), bool>,
392) {
393    for (pc, op) in code.iter_mut().enumerate() {
394        // Look up this (fn, pc) in the escape index. Absent → analysis
395        // didn't observe the site (defensive: leave on heap path).
396        // Present and false → safe to stack-allocate. Each rewrite is a
397        // single-slot swap preserving pc / stack delta, so jump
398        // targets, downstream peephole passes, and the body-hash
399        // decoder all see the same program shape.
400        let key = (fn_name.to_string(), pc as u32);
401        if !matches!(escape_index.get(&key), Some(false)) {
402            continue;
403        }
404        match *op {
405            Op::MakeRecord { shape_idx, field_count } => {
406                *op = Op::AllocStackRecord { shape_idx, field_count };
407            }
408            // #464 tuple codegen: same single-slot swap as records.
409            Op::MakeTuple(arity) => {
410                *op = Op::AllocStackTuple { arity };
411            }
412            _ => {}
413        }
414    }
415}
416
417fn apply_peephole(code: &mut [Op], constants: &[Const]) {
418    if code.len() < 3 { return; }
419    let jump_targets = collect_jump_targets(code);
420
421    let n = code.len();
422    let mut k = 0;
423    while k + 2 < n {
424        if let (Op::LoadLocal(local_idx), Op::PushConst(imm_const_idx), Op::IntAdd)
425            = (code[k], code[k + 1], code[k + 2])
426        {
427            let imm_is_int = matches!(
428                constants.get(imm_const_idx as usize),
429                Some(Const::Int(_))
430            );
431            // Tombstones at k+1 and k+2 must not be jump targets;
432            // k itself can be a target (it stays a live op — the
433            // fused form executes the same semantics in one step).
434            let safe = imm_is_int
435                && !jump_targets.contains(&(k + 1))
436                && !jump_targets.contains(&(k + 2));
437            if safe {
438                code[k] = Op::LoadLocalAddIntConst { local_idx, imm_const_idx };
439                k += 3;
440                continue;
441            }
442        }
443        k += 1;
444    }
445}
446
447/// Slice 2: fuse `[LoadLocalAddIntConst, _, _, StoreLocal(dest)]`
448/// into `LoadLocalAddIntConstStoreLocal { src, imm_const_idx, dest }`.
449/// The two `_` slots are slice-1 tombstones (the original PushConst
450/// and IntAdd) and stay in place as slice-2 tombstones too. The
451/// dispatch loop advances pc by 4 past all three trailing slots
452/// after executing the fused op.
453fn apply_peephole_slice2(code: &mut [Op]) {
454    if code.len() < 4 { return; }
455    let jump_targets = collect_jump_targets(code);
456
457    let n = code.len();
458    let mut k = 0;
459    while k + 3 < n {
460        if let (
461            Op::LoadLocalAddIntConst { local_idx: src, imm_const_idx },
462            _,
463            _,
464            Op::StoreLocal(dest),
465        ) = (code[k], code[k + 1], code[k + 2], code[k + 3])
466        {
467            // Slice-1 contract: code[k+1] is the original
468            // PushConst(imm_const_idx) and code[k+2] is the
469            // original IntAdd. We don't re-verify those — slice 1
470            // is the only producer of LoadLocalAddIntConst and
471            // always leaves the contract intact.
472            //
473            // Safety: tombstones at k+1..k+3 must not be reachable
474            // from any jump. k itself can be (it's still a live
475            // op carrying the same semantics).
476            let safe = !jump_targets.contains(&(k + 1))
477                && !jump_targets.contains(&(k + 2))
478                && !jump_targets.contains(&(k + 3));
479            if safe {
480                code[k] = Op::LoadLocalAddIntConstStoreLocal {
481                    src,
482                    imm_const_idx,
483                    dest,
484                };
485                k += 4;
486                continue;
487            }
488        }
489        k += 1;
490    }
491}
492
493/// Slice 3: fuse `[LoadLocal(lhs), LoadLocal(rhs), IntAdd]` into
494/// `LoadLocalAddLocal { lhs_idx, rhs_idx }`. The binary-op-on-two-
495/// locals idiom: any `a + b` where both operands compile to a
496/// `LoadLocal` (typed `Int`). Mirrors slice 1's shape exactly — the
497/// trailing `LoadLocal` + `IntAdd` stay in place as inert tombstones
498/// with cancelling stack deltas (+1, -1), so the verifier and
499/// body-hash decoder both keep walking them as live.
500///
501/// Disjoint from slice 1: the second slot disambiguates (LoadLocal
502/// vs PushConst), so a site can match at most one of the two. Runs
503/// after slice 2 so we don't accidentally consume a `LoadLocal` slot
504/// that slice 2 was about to fuse into a `*StoreLocal` superop (and
505/// to keep slice 2's input contract — slice-1 output followed by
506/// StoreLocal — untouched).
507///
508/// Safety: like slice 1, the trailing two slots must not be jump
509/// targets. The first slot can be a target (it stays a live op).
510fn apply_peephole_slice3(code: &mut [Op]) {
511    if code.len() < 3 { return; }
512    let jump_targets = collect_jump_targets(code);
513
514    let n = code.len();
515    let mut k = 0;
516    while k + 2 < n {
517        if let (Op::LoadLocal(lhs_idx), Op::LoadLocal(rhs_idx), Op::IntAdd)
518            = (code[k], code[k + 1], code[k + 2])
519        {
520            let safe = !jump_targets.contains(&(k + 1))
521                && !jump_targets.contains(&(k + 2));
522            if safe {
523                code[k] = Op::LoadLocalAddLocal { lhs_idx, rhs_idx };
524                k += 3;
525                continue;
526            }
527        }
528        k += 1;
529    }
530}
531
532/// Slice 4: slice 3 for `IntSub` and `IntMul`. Fuses
533/// `[LoadLocal(lhs), LoadLocal(rhs), IntSub]` to
534/// `LoadLocalSubLocal { lhs_idx, rhs_idx }` and the `IntMul` shape
535/// to `LoadLocalMulLocal`. Same tombstone, jump-safety, and
536/// body-hash story as slice 3 — the trailing two slots stay as
537/// inert primitives with cancelling stack deltas.
538///
539/// Disjoint from every prior slice: slice 1/2 require a `PushConst`
540/// at slot 2 (here it's `LoadLocal`), and slice 3's terminator is
541/// `IntAdd` (here it's `IntSub` / `IntMul`). A given site matches at
542/// most one slice.
543fn apply_peephole_slice4(code: &mut [Op]) {
544    if code.len() < 3 { return; }
545    let jump_targets = collect_jump_targets(code);
546
547    let n = code.len();
548    let mut k = 0;
549    while k + 2 < n {
550        if let (Op::LoadLocal(lhs_idx), Op::LoadLocal(rhs_idx), terminator)
551            = (code[k], code[k + 1], code[k + 2])
552        {
553            let fused = match terminator {
554                Op::IntSub => Some(Op::LoadLocalSubLocal { lhs_idx, rhs_idx }),
555                Op::IntMul => Some(Op::LoadLocalMulLocal { lhs_idx, rhs_idx }),
556                _ => None,
557            };
558            if let Some(fused_op) = fused {
559                let safe = !jump_targets.contains(&(k + 1))
560                    && !jump_targets.contains(&(k + 2));
561                if safe {
562                    code[k] = fused_op;
563                    k += 3;
564                    continue;
565                }
566            }
567        }
568        k += 1;
569    }
570}
571
572/// Slice 5: fuse the loop-condition idiom — 4-slot window
573/// `[LoadLocal, LoadLocal|PushConst, IntLt, JumpIfNot(offset)]` —
574/// into `LoadLocalLtLocalJumpIfNot` or `LoadLocalLtIntConstJumpIfNot`.
575/// First jump-aware peephole in this codebase: the fused op carries
576/// the JumpIfNot's offset and the VM dispatches directly to either
577/// `pc + 4` (condition true, fall through past tombstones) or
578/// `pc + 4 + offset` (condition false, original JumpIfNot target).
579///
580/// Safety conditions, on top of slice 1's "tombstones must not be
581/// jump targets":
582/// 1. Trailing 3 slots (k+1, k+2, k+3) must not be jump targets from
583///    elsewhere — same as slice 1/3/4, just three of them.
584/// 2. The slot at k+3 (JumpIfNot) is the one whose offset we copy
585///    into the fused op. The offset is relative to the JumpIfNot's
586///    `pc + 1` which equals `k + 4`, so the resolved target is
587///    `k + 4 + offset` — that target must be safe to land on (it
588///    already is, since JumpIfNot is operating as designed).
589/// 3. The const-int branch checks the PushConst points at a
590///    `Const::Int` — same as slice 1.
591fn apply_peephole_slice5(code: &mut [Op], constants: &[Const]) {
592    if code.len() < 4 { return; }
593    let jump_targets = collect_jump_targets(code);
594
595    let n = code.len();
596    let mut k = 0;
597    while k + 3 < n {
598        // Match the lhs slot — always a LoadLocal.
599        let lhs_idx = match code[k] {
600            Op::LoadLocal(i) => i,
601            _ => { k += 1; continue; }
602        };
603        // Match the rhs slot — either LoadLocal or PushConst(Int).
604        // The two flavors emit different fused ops.
605        let fused = match (code[k + 1], code[k + 2], code[k + 3]) {
606            (Op::PushConst(imm_const_idx), Op::IntEq, Op::JumpIfNot(jump_offset))
607                if matches!(constants.get(imm_const_idx as usize), Some(Const::Int(_))) =>
608                Some(Op::LoadLocalEqIntConstJumpIfNot {
609                    local_idx: lhs_idx, imm_const_idx, jump_offset,
610                }),
611            _ => None,
612        };
613        if let Some(fused_op) = fused {
614            let safe = !jump_targets.contains(&(k + 1))
615                && !jump_targets.contains(&(k + 2))
616                && !jump_targets.contains(&(k + 3));
617            if safe {
618                code[k] = fused_op;
619                k += 4;
620                continue;
621            }
622        }
623        k += 1;
624    }
625}
626
627/// Slice 6: fuse the match-scrutinee dance preceding a slice-5
628/// pattern-match arm test. 3-slot window
629/// `[LoadLocal(src), StoreLocal(dst),
630///   LoadLocalEqIntConstJumpIfNot { local_idx: dst, ... }]` —
631/// where the slice-5 op's `local_idx` matches the StoreLocal's
632/// destination — rewrites to
633/// `LoadLocalStoreEqIntConstJumpIfNot { src, dst, ... }` at slot k.
634/// The fused op carries `dst` so it can mirror the original
635/// StoreLocal (later arm tests in the same match keep reading
636/// `locals[dst]`).
637///
638/// Trailing tombstones: 5 slots (the original StoreLocal + the
639/// slice-5 fused op itself + slice 5's 3 primitive tombstones).
640/// VM dispatch skips them via `pc + 6`; verifier override pushes
641/// `(pc + 6, ...)` and the branch target `(pc + 6 + jump_offset, ...)`
642/// — the offset is identical to what slice 5 stored (still relative
643/// to the original JumpIfNot's `pc + 1`, now at `k + 5 + 1 = k + 6`).
644///
645/// Safety: slots k+1..=k+5 must not be jump targets — same window
646/// safety as the other slices. Slice 5 already verified k+3..=k+5
647/// weren't jump targets when it fused; slice 6 only needs to re-check
648/// k+1 (the StoreLocal) and k+2 (the slice-5 fused op).
649fn apply_peephole_slice6(code: &mut [Op]) {
650    if code.len() < 3 { return; }
651    let jump_targets = collect_jump_targets(code);
652
653    let n = code.len();
654    let mut k = 0;
655    while k + 2 < n {
656        if let (
657            Op::LoadLocal(src),
658            Op::StoreLocal(dst),
659            Op::LoadLocalEqIntConstJumpIfNot { local_idx, imm_const_idx, jump_offset },
660        ) = (code[k], code[k + 1], code[k + 2]) {
661            // The slice-5 op must read the very local the StoreLocal
662            // just wrote; if it reads some other local this isn't the
663            // match-scrutinee idiom (could be a coincidental sequence).
664            if local_idx == dst {
665                let safe = !jump_targets.contains(&(k + 1))
666                    && !jump_targets.contains(&(k + 2));
667                if safe {
668                    code[k] = Op::LoadLocalStoreEqIntConstJumpIfNot {
669                        src, dst, imm_const_idx, jump_offset,
670                    };
671                    // Skip past this slice-6 window. The slice-5
672                    // tombstones at k+3..=k+5 are already handled by
673                    // slice 5's earlier rewrite; we don't need to
674                    // touch them.
675                    k += 3;
676                    continue;
677                }
678            }
679        }
680        k += 1;
681    }
682}
683
684/// Slice 7/8: fuse `[LoadLocal(local_idx), GetField{name_idx,
685/// site_idx}, IntAdd|IntSub|IntMul]` into the matching
686/// `LoadLocalGetField{Add,Sub,Mul} { local_idx, name_idx, site_idx }`.
687///
688/// Fires on the `acc OP r.field` accumulator-with-field-read idiom —
689/// the bytecode the compiler emits for `prev_expr OP record.field`
690/// once `prev_expr` is on the stack. Common in handler-shaped code
691/// like `r.x + r.y + r.z` (the LHS of each operator after the first
692/// matches this pattern), `acc + items[i].weight` reductions, and
693/// the `v.l - v.m` / `v.h * v.k` mixes the `response_build` profile
694/// exercises.
695///
696/// Disjoint from every prior slice: slice 1 wants `PushConst` at
697/// slot 1; slices 3-4 want `LoadLocal` at slot 1; slice 5 wants
698/// a 4-slot window with `IntEq + JumpIfNot` terminator. Only this
699/// slice matches a `GetField` at slot 1.
700///
701/// Order: must run after slice 4 (so the disjointness analysis
702/// holds — slice 3/4 patterns with a trailing IntAdd / IntSub /
703/// IntMul never carry a GetField at slot 1 and don't compete);
704/// must run before / independent of slice 5/6, which don't match
705/// any slot in this window.
706///
707/// Safety: trailing two slots (the original `GetField` and the
708/// arithmetic op) must not be jump targets. The first slot can be.
709fn apply_peephole_slice7(code: &mut [Op]) {
710    if code.len() < 3 { return; }
711    let jump_targets = collect_jump_targets(code);
712
713    let n = code.len();
714    let mut k = 0;
715    while k + 2 < n {
716        if let (Op::LoadLocal(local_idx), Op::GetField { name_idx, site_idx })
717            = (code[k], code[k + 1])
718        {
719            let fused = match code[k + 2] {
720                Op::IntAdd => Some(Op::LoadLocalGetFieldAdd { local_idx, name_idx, site_idx }),
721                Op::IntSub => Some(Op::LoadLocalGetFieldSub { local_idx, name_idx, site_idx }),
722                Op::IntMul => Some(Op::LoadLocalGetFieldMul { local_idx, name_idx, site_idx }),
723                _ => None,
724            };
725            if let Some(op) = fused {
726                let safe = !jump_targets.contains(&(k + 1))
727                    && !jump_targets.contains(&(k + 2));
728                if safe {
729                    code[k] = op;
730                    k += 3;
731                    continue;
732                }
733            }
734        }
735        k += 1;
736    }
737}
738
739/// Slice 9: fuse the bare `[LoadLocal(local_idx), GetField{name_idx,
740/// site_idx}]` pair into `LoadLocalGetField { local_idx, name_idx,
741/// site_idx }` — the plain `record.field` read, the most common
742/// field-access shape.
743///
744/// The win is allocation, not just one fewer dispatch: the unfused
745/// pair clones the entire record onto the value stack (a
746/// `Box<IndexMap>` for a heap record) only to read one field; the
747/// fused op reads the field out of the local by reference and clones
748/// only that value. On `response_build` the whole-record clone of the
749/// returned `Response` (`r.total`) was the dominant malloc source.
750///
751/// Order: MUST run after slice 7/8. Those fuse `[LoadLocal, GetField,
752/// IntAdd|IntSub|IntMul]`; if slice 9 ran first it would consume the
753/// `LoadLocal + GetField` prefix and block the 3-op fusion. After
754/// slice 7/8, the only remaining `[LoadLocal, GetField]` pairs are
755/// the ones they didn't want (chain heads, standalone reads, field
756/// reads feeding other ops). Slice 7/8's tombstone GetFields sit
757/// after their fused op, never after a bare `LoadLocal`, so slice 9
758/// won't touch them.
759///
760/// Safety: the trailing slot (the original `GetField`) must not be a
761/// jump target. The first slot can be.
762fn apply_peephole_slice9(code: &mut [Op]) {
763    if code.len() < 2 { return; }
764    let jump_targets = collect_jump_targets(code);
765
766    let n = code.len();
767    let mut k = 0;
768    while k + 1 < n {
769        if let (Op::LoadLocal(local_idx), Op::GetField { name_idx, site_idx })
770            = (code[k], code[k + 1])
771        {
772            if !jump_targets.contains(&(k + 1)) {
773                code[k] = Op::LoadLocalGetField { local_idx, name_idx, site_idx };
774                k += 2;
775                continue;
776            }
777        }
778        k += 1;
779    }
780}
781
782fn collect_jump_targets(code: &[Op]) -> std::collections::HashSet<usize> {
783    let mut targets = std::collections::HashSet::new();
784    for (pc, op) in code.iter().enumerate() {
785        let off = match op {
786            Op::Jump(off) | Op::JumpIf(off) | Op::JumpIfNot(off) => Some(*off),
787            _ => None,
788        };
789        if let Some(off) = off {
790            let target = (pc as i32 + 1 + off) as usize;
791            targets.insert(target);
792        }
793    }
794    targets
795}
796
797#[derive(Debug, Clone)]
798struct PendingLambda {
799    fn_id: u32,
800    /// Names of captured outer-scope locals, in order.
801    capture_names: Vec<String>,
802    params: Vec<a::Param>,
803    body: a::CExpr,
804}
805
806struct FnCompiler<'a> {
807    code: Vec<Op>,
808    locals: IndexMap<String, u16>,
809    next_local: u16,
810    /// Peak local usage seen during compilation (for VM frame sizing).
811    peak_local: u16,
812    /// Inferred numeric type of each local for typed numeric-op
813    /// lowering (#461). Populated when binding function parameters
814    /// (from their declared `TypeExpr::Named { name: "Int", .. }`
815    /// or `"Float"`) and when binding `let name := value` where
816    /// the RHS classifies statically. Used by `compile_binop` to
817    /// emit `Op::IntAdd` / `Op::FloatAdd` instead of the
818    /// polymorphic `Op::NumAdd` when both operands' types are
819    /// statically known. Conservative: falls back to `NumTy::Unknown`
820    /// (and the polymorphic op) whenever a type isn't locally
821    /// derivable.
822    ///
823    /// Keyed by local *name* (parallel to `locals`) rather than by
824    /// slot index so shadowed bindings are handled correctly via
825    /// `IndexMap`'s insertion-order semantics.
826    local_types: IndexMap<String, NumTy>,
827    /// Per-local map of statically-known field types (#461 slice 7).
828    /// Populated when a local is bound from a `RecordLit` whose
829    /// fields all classify to non-`Unknown` `NumTy`s. Lets
830    /// `classify_expr(FieldAccess { value: Var(name), field })`
831    /// return a precise `NumTy` instead of falling back to
832    /// `Unknown` — which in turn unlocks the typed-Add lowering
833    /// (`+` over two Ints → `IntAdd`) on `r.field + r.field`
834    /// chains, which slice 7 then fuses into
835    /// `LoadLocalGetFieldAdd`.
836    ///
837    /// Only the literal-binding case is tracked here; annotated
838    /// `let r :: R := ...` would require resolving the type alias
839    /// `R` to its field-type map, which the compiler doesn't yet
840    /// have. Future slice work.
841    local_record_field_types: IndexMap<String, IndexMap<String, NumTy>>,
842    /// Per-function counter for `Op::GetField` site indices (#462
843    /// slice 1). Each `Op::GetField` emit allocates the next index
844    /// here, giving every field-access site within this function a
845    /// stable identifier independent of pc. The VM uses
846    /// `(fn_id, site_idx)` as the inline-cache key, so the cache
847    /// survives the future dispatch rewrite (#461) and a JIT (#465).
848    field_get_sites: u32,
849    pool: &'a mut ConstPool,
850    function_names: &'a IndexMap<String, u32>,
851    module_aliases: &'a IndexMap<String, String>,
852    /// CExpr address → NodeId, populated per stage via `lex_ast::expr_ids`.
853    id_map: &'a std::collections::HashMap<*const a::CExpr, lex_ast::NodeId>,
854    /// Queue of lambdas discovered during compilation; each gets a fresh
855    /// fn_id and is compiled in a later pass.
856    pending_lambdas: &'a mut Vec<PendingLambda>,
857    /// Mutable view of the function table — used to allocate fn_ids for
858    /// freshly-discovered lambdas.
859    next_fn_id: &'a mut Vec<Function>,
860}
861
862/// Lightweight numeric-type classification used by `compile_binop`
863/// to decide whether to emit `IntAdd` / `FloatAdd` (specialized,
864/// fast) or `NumAdd` (polymorphic, runtime-typed dispatch). #461
865/// typed-lowering pass — conservative: anything not provably one
866/// of these returns `Unknown` and falls back to the polymorphic op.
867#[derive(Debug, Clone, Copy, PartialEq, Eq)]
868enum NumTy { Int, Float, Unknown }
869
870/// #461 slice 7: extract a `field_name -> NumTy` map from a record
871/// type expression. Resolves named types (`r :: R`) via
872/// `type_aliases` and returns `None` if the type ultimately isn't
873/// a record literal.
874fn record_field_types(
875    ty: &a::TypeExpr,
876    type_aliases: &IndexMap<String, a::TypeExpr>,
877) -> Option<IndexMap<String, NumTy>> {
878    match ty {
879        a::TypeExpr::Record { fields } => {
880            let mut m = IndexMap::new();
881            for f in fields {
882                m.insert(f.name.clone(), classify_type_expr(&f.ty));
883            }
884            Some(m)
885        }
886        a::TypeExpr::Refined { base, .. } => record_field_types(base, type_aliases),
887        a::TypeExpr::Named { name, args } if args.is_empty() => {
888            // Resolve the alias and recurse. Cycle protection isn't
889            // needed here — a cyclic type alias would have been
890            // rejected by `lex-types::check_program` upstream.
891            type_aliases.get(name).and_then(|t| record_field_types(t, type_aliases))
892        }
893        _ => None,
894    }
895}
896
897fn classify_type_expr(ty: &a::TypeExpr) -> NumTy {
898    match ty {
899        a::TypeExpr::Named { name, args } if args.is_empty() => match name.as_str() {
900            "Int" => NumTy::Int,
901            "Float" => NumTy::Float,
902            _ => NumTy::Unknown,
903        },
904        // `Refined { base, .. }` (#209) — classify by the base type;
905        // the refinement predicate doesn't change the value's primitive shape.
906        a::TypeExpr::Refined { base, .. } => classify_type_expr(base),
907        _ => NumTy::Unknown,
908    }
909}
910
911impl<'a> FnCompiler<'a> {
912    fn alloc_local(&mut self, name: &str) -> u16 {
913        let i = self.next_local;
914        self.locals.insert(name.into(), i);
915        self.next_local += 1;
916        if self.next_local > self.peak_local { self.peak_local = self.next_local; }
917        i
918    }
919    fn emit(&mut self, op: Op) { self.code.push(op); }
920
921    fn compile_expr(&mut self, e: &a::CExpr, tail: bool) {
922        match e {
923            a::CExpr::Literal { value } => self.compile_lit(value),
924            a::CExpr::Var { name } => {
925                if let Some(slot) = self.locals.get(name) {
926                    self.emit(Op::LoadLocal(*slot));
927                } else if let Some(&fn_id) = self.function_names.get(name) {
928                    // Function name used as a *value* (e.g. as a record-field
929                    // initializer or fold-callback arg) — materialize it as a
930                    // closure with no captures. The runtime already accepts
931                    // `Value::Closure { fn_id, captures: vec![] }` and
932                    // `CallClosure` dispatches it. (#169)
933                    self.emit(Op::MakeClosure { fn_id, capture_count: 0 });
934                } else {
935                    // Should be caught at type-check time; the type checker
936                    // walks every Var. If we land here it's a compiler bug,
937                    // not a user typo.
938                    panic!("unknown var in compiler: {name}");
939                }
940            }
941            a::CExpr::Let { name, ty, value, body } => {
942                // Classify the RHS for typed-op lowering (#461). Prefer
943                // the declared annotation when present (cheap O(1)
944                // lookup); fall back to classifying the value
945                // expression structurally.
946                let nty = match ty {
947                    Some(t) => classify_type_expr(t),
948                    None => self.classify_expr(value),
949                };
950                // #461 slice 7: when the RHS is a record literal,
951                // remember the field types so `name.field` accesses
952                // downstream can classify precisely. Without this,
953                // `r.x + r.y` falls through to `NumAdd`, blocking
954                // the slice-7 fusion.
955                if let a::CExpr::RecordLit { fields } = value.as_ref() {
956                    let mut ftypes = IndexMap::new();
957                    for f in fields {
958                        let fty = self.classify_expr(&f.value);
959                        ftypes.insert(f.name.clone(), fty);
960                    }
961                    self.local_record_field_types.insert(name.clone(), ftypes);
962                }
963                self.compile_expr(value, false);
964                let slot = self.alloc_local(name);
965                self.local_types.insert(name.clone(), nty);
966                self.emit(Op::StoreLocal(slot));
967                self.compile_expr(body, tail);
968            }
969            a::CExpr::Block { statements, result } => {
970                for s in statements {
971                    self.compile_expr(s, false);
972                    self.emit(Op::Pop);
973                }
974                self.compile_expr(result, tail);
975            }
976            a::CExpr::Call { callee, args } => self.compile_call(e, callee, args, tail),
977            a::CExpr::Constructor { name, args } => {
978                for a in args { self.compile_expr(a, false); }
979                let name_idx = self.pool.variant(name);
980                self.emit(Op::MakeVariant { name_idx, arity: args.len() as u16 });
981            }
982            a::CExpr::Match { scrutinee, arms } => self.compile_match(scrutinee, arms, tail),
983            a::CExpr::RecordLit { fields } => {
984                let mut idxs = Vec::with_capacity(fields.len());
985                for f in fields {
986                    self.compile_expr(&f.value, false);
987                    idxs.push(self.pool.field(&f.name));
988                }
989                let field_count = idxs.len() as u16;
990                let shape_idx = self.pool.record_shape(idxs);
991                self.emit(Op::MakeRecord { shape_idx, field_count });
992            }
993            a::CExpr::TupleLit { items } => {
994                for it in items { self.compile_expr(it, false); }
995                self.emit(Op::MakeTuple(items.len() as u16));
996            }
997            a::CExpr::ListLit { items } => {
998                for it in items { self.compile_expr(it, false); }
999                self.emit(Op::MakeList(items.len() as u32));
1000            }
1001            a::CExpr::FieldAccess { value, field } => {
1002                self.compile_expr(value, false);
1003                let name_idx = self.pool.field(field);
1004                let site_idx = self.field_get_sites;
1005                self.field_get_sites += 1;
1006                self.emit(Op::GetField { name_idx, site_idx });
1007            }
1008            a::CExpr::BinOp { op, lhs, rhs } => self.compile_binop(op, lhs, rhs),
1009            a::CExpr::UnaryOp { op, expr } => {
1010                self.compile_expr(expr, false);
1011                match op.as_str() {
1012                    "-" => self.emit(Op::NumNeg),
1013                    "not" => self.emit(Op::BoolNot),
1014                    other => panic!("unknown unary: {other}"),
1015                }
1016            }
1017            a::CExpr::Lambda { params, body, .. } => self.compile_lambda(params, body),
1018            a::CExpr::Return { value } => {
1019                self.compile_expr(value, true);
1020                self.emit(Op::Return);
1021            }
1022        }
1023    }
1024
1025    fn compile_lit(&mut self, l: &a::CLit) {
1026        let i = match l {
1027            a::CLit::Int { value } => self.pool.int(*value),
1028            a::CLit::Bool { value } => self.pool.bool(*value),
1029            a::CLit::Float { value } => {
1030                let f: f64 = value.parse().unwrap_or(0.0);
1031                self.pool.float(f)
1032            }
1033            a::CLit::Str { value } => self.pool.str(value),
1034            a::CLit::Bytes { value: _ } => {
1035                // Stub: M4 doesn't use bytes literals in §3.13 examples.
1036                let i = self.pool.pool.len() as u32;
1037                self.pool.pool.push(Const::Bytes(Vec::new()));
1038                i
1039            }
1040            a::CLit::Unit => self.pool.unit(),
1041        };
1042        self.emit(Op::PushConst(i));
1043    }
1044
1045    fn compile_call(&mut self, call_expr: &a::CExpr, callee: &a::CExpr, args: &[a::CExpr], tail: bool) {
1046        let node_id = self
1047            .id_map
1048            .get(&(call_expr as *const a::CExpr))
1049            .map(|n| n.as_str().to_string())
1050            .unwrap_or_else(|| "n_?".into());
1051        let node_id_idx = self.pool.node_id(&node_id);
1052
1053        // Module function call: `alias.op(args)` where `alias` is an imported
1054        // module ⇒ EffectCall, except for higher-order pure ops where we
1055        // emit inline bytecode using CallClosure (the closure-arg can't be
1056        // serialized through the effect handler).
1057        if let a::CExpr::FieldAccess { value, field } = callee {
1058            if let a::CExpr::Var { name } = value.as_ref() {
1059                if let Some(module) = self.module_aliases.get(name) {
1060                    if self.try_emit_higher_order(module, field, args, node_id_idx) {
1061                        let _ = tail;
1062                        return;
1063                    }
1064                    for a in args { self.compile_expr(a, false); }
1065                    let kind_idx = self.pool.str(module);
1066                    let op_idx = self.pool.str(field);
1067                    self.emit(Op::EffectCall {
1068                        kind_idx,
1069                        op_idx,
1070                        arity: args.len() as u16,
1071                        node_id_idx,
1072                    });
1073                    let _ = tail;
1074                    return;
1075                }
1076            }
1077        }
1078        match callee {
1079            a::CExpr::Var { name } if self.function_names.contains_key(name) => {
1080                for a in args { self.compile_expr(a, false); }
1081                let fn_id = self.function_names[name];
1082                if tail {
1083                    self.emit(Op::TailCall { fn_id, arity: args.len() as u16, node_id_idx });
1084                } else {
1085                    self.emit(Op::Call { fn_id, arity: args.len() as u16, node_id_idx });
1086                }
1087            }
1088            a::CExpr::Var { name } if self.locals.contains_key(name) => {
1089                // First-class function value bound to a local. Push the
1090                // closure, then args, then CallClosure.
1091                let slot = self.locals[name];
1092                self.emit(Op::LoadLocal(slot));
1093                for a in args { self.compile_expr(a, false); }
1094                self.emit(Op::CallClosure { arity: args.len() as u16, node_id_idx });
1095            }
1096            // Lambda directly applied — push closure + args + CallClosure.
1097            other => {
1098                self.compile_expr(other, false);
1099                for a in args { self.compile_expr(a, false); }
1100                self.emit(Op::CallClosure { arity: args.len() as u16, node_id_idx });
1101            }
1102        }
1103    }
1104
1105    fn compile_binop(&mut self, op: &str, lhs: &a::CExpr, rhs: &a::CExpr) {
1106        // #461 typed lowering: if we can statically prove both
1107        // operands are the same numeric type, emit the typed
1108        // primitive (`IntAdd` / `FloatAdd`) instead of the
1109        // polymorphic `NumAdd` that runtime-matches on operand
1110        // shape. The fast path skips one match per arithmetic op
1111        // *and* unblocks downstream peephole fusions (slice 1)
1112        // that scan for typed primitives. Conservative fallback
1113        // to the polymorphic op when either side classifies as
1114        // `Unknown`, so correctness for `Float` / mixed code is
1115        // unchanged.
1116        let lhs_ty = self.classify_expr(lhs);
1117        let rhs_ty = self.classify_expr(rhs);
1118        let typed = match (lhs_ty, rhs_ty) {
1119            (NumTy::Int, NumTy::Int) => NumTy::Int,
1120            (NumTy::Float, NumTy::Float) => NumTy::Float,
1121            _ => NumTy::Unknown,
1122        };
1123        self.compile_expr(lhs, false);
1124        self.compile_expr(rhs, false);
1125        match (op, typed) {
1126            ("+",  NumTy::Int)     => self.emit(Op::IntAdd),
1127            ("+",  NumTy::Float)   => self.emit(Op::FloatAdd),
1128            ("+",  NumTy::Unknown) => self.emit(Op::NumAdd),
1129            ("-",  NumTy::Int)     => self.emit(Op::IntSub),
1130            ("-",  NumTy::Float)   => self.emit(Op::FloatSub),
1131            ("-",  NumTy::Unknown) => self.emit(Op::NumSub),
1132            ("*",  NumTy::Int)     => self.emit(Op::IntMul),
1133            ("*",  NumTy::Float)   => self.emit(Op::FloatMul),
1134            ("*",  NumTy::Unknown) => self.emit(Op::NumMul),
1135            ("/",  NumTy::Int)     => self.emit(Op::IntDiv),
1136            ("/",  NumTy::Float)   => self.emit(Op::FloatDiv),
1137            ("/",  NumTy::Unknown) => self.emit(Op::NumDiv),
1138            // Int has %; Float doesn't (NumMod will reject at runtime).
1139            ("%",  NumTy::Int)     => self.emit(Op::IntMod),
1140            ("%",  _)              => self.emit(Op::NumMod),
1141            ("==", NumTy::Int)     => self.emit(Op::IntEq),
1142            ("==", NumTy::Float)   => self.emit(Op::FloatEq),
1143            ("==", NumTy::Unknown) => self.emit(Op::NumEq),
1144            ("!=", NumTy::Int)     => { self.emit(Op::IntEq);   self.emit(Op::BoolNot); }
1145            ("!=", NumTy::Float)   => { self.emit(Op::FloatEq); self.emit(Op::BoolNot); }
1146            ("!=", NumTy::Unknown) => { self.emit(Op::NumEq);   self.emit(Op::BoolNot); }
1147            ("<",  NumTy::Int)     => self.emit(Op::IntLt),
1148            ("<",  NumTy::Float)   => self.emit(Op::FloatLt),
1149            ("<",  NumTy::Unknown) => self.emit(Op::NumLt),
1150            ("<=", NumTy::Int)     => self.emit(Op::IntLe),
1151            ("<=", NumTy::Float)   => self.emit(Op::FloatLe),
1152            ("<=", NumTy::Unknown) => self.emit(Op::NumLe),
1153            (">",  NumTy::Int)     => { self.emit_swap_top2(); self.emit(Op::IntLt); }
1154            (">",  NumTy::Float)   => { self.emit_swap_top2(); self.emit(Op::FloatLt); }
1155            (">",  NumTy::Unknown) => { self.emit_swap_top2(); self.emit(Op::NumLt); }
1156            (">=", NumTy::Int)     => { self.emit_swap_top2(); self.emit(Op::IntLe); }
1157            (">=", NumTy::Float)   => { self.emit_swap_top2(); self.emit(Op::FloatLe); }
1158            (">=", NumTy::Unknown) => { self.emit_swap_top2(); self.emit(Op::NumLe); }
1159            ("and", _) => self.emit(Op::BoolAnd),
1160            ("or",  _) => self.emit(Op::BoolOr),
1161            (other, _) => panic!("unknown binop: {other:?}"),
1162        }
1163    }
1164
1165    /// Classify an expression's static numeric type for #461 typed
1166    /// lowering. Strictly conservative: only returns `Int` / `Float`
1167    /// when the type is locally derivable from a literal, an
1168    /// already-classified local, or a binary op on two same-typed
1169    /// operands. Everything else (function calls, field access,
1170    /// match expressions, ...) falls back to `Unknown` and the
1171    /// polymorphic NumAdd-family op.
1172    fn classify_expr(&self, e: &a::CExpr) -> NumTy {
1173        match e {
1174            a::CExpr::Literal { value: a::CLit::Int { .. } } => NumTy::Int,
1175            a::CExpr::Literal { value: a::CLit::Float { .. } } => NumTy::Float,
1176            a::CExpr::Var { name } =>
1177                self.local_types.get(name).copied().unwrap_or(NumTy::Unknown),
1178            a::CExpr::BinOp { op, lhs, rhs } => {
1179                // Numeric ops preserve the operand type (Int+Int=Int,
1180                // Float+Float=Float). Comparison/logical ops yield
1181                // Bool, not a numeric type — return Unknown.
1182                let is_numeric = matches!(op.as_str(), "+" | "-" | "*" | "/" | "%");
1183                if !is_numeric { return NumTy::Unknown; }
1184                match (self.classify_expr(lhs), self.classify_expr(rhs)) {
1185                    (NumTy::Int, NumTy::Int) => NumTy::Int,
1186                    (NumTy::Float, NumTy::Float) => NumTy::Float,
1187                    _ => NumTy::Unknown,
1188                }
1189            }
1190            a::CExpr::UnaryOp { op, expr } if op == "-" => self.classify_expr(expr),
1191            // #461 slice 7: `r.field` access where `r` is a local
1192            // bound from a record literal. Reads the per-local
1193            // field-type map populated at the let-binding site.
1194            // Unknown otherwise (record argument with `:: R`
1195            // annotation, helper-returned record, etc.) — those
1196            // would need type-alias resolution to classify.
1197            a::CExpr::FieldAccess { value, field } => {
1198                if let a::CExpr::Var { name } = value.as_ref() {
1199                    if let Some(ftypes) = self.local_record_field_types.get(name) {
1200                        return ftypes.get(field).copied().unwrap_or(NumTy::Unknown);
1201                    }
1202                }
1203                NumTy::Unknown
1204            }
1205            // Let-expressions: the let-binding mutates `local_types`
1206            // *during* compile_expr; classifying ahead of time would
1207            // require simulating that. Conservative fallback.
1208            _ => NumTy::Unknown,
1209        }
1210    }
1211
1212    fn emit_swap_top2(&mut self) {
1213        let a = self.alloc_local("__swap_a");
1214        let b = self.alloc_local("__swap_b");
1215        self.emit(Op::StoreLocal(b));
1216        self.emit(Op::StoreLocal(a));
1217        self.emit(Op::LoadLocal(b));
1218        self.emit(Op::LoadLocal(a));
1219    }
1220
1221    fn compile_match(&mut self, scrutinee: &a::CExpr, arms: &[a::Arm], tail: bool) {
1222        self.compile_expr(scrutinee, false);
1223        let scrut_slot = self.alloc_local("__scrut");
1224        self.emit(Op::StoreLocal(scrut_slot));
1225
1226        let mut end_jumps: Vec<usize> = Vec::new();
1227        for arm in arms {
1228            let arm_start_locals = self.next_local;
1229            let arm_start_locals_map = self.locals.clone();
1230
1231            self.emit(Op::LoadLocal(scrut_slot));
1232            let mut bindings: Vec<(String, u16)> = Vec::new();
1233            let fail_jumps: Vec<usize> = self.compile_pattern_test(&arm.pattern, &mut bindings);
1234
1235            self.compile_expr(&arm.body, tail);
1236            let j_end = self.code.len();
1237            self.emit(Op::Jump(0));
1238            end_jumps.push(j_end);
1239
1240            let fail_target = self.code.len() as i32;
1241            for j in fail_jumps {
1242                // #337: PConstructor patterns now register an
1243                // unconditional `Op::Jump` for the failure path
1244                // (alongside the existing `Op::JumpIfNot` from
1245                // PLiteral / nested constructor tests). Patch
1246                // either shape.
1247                match &mut self.code[j] {
1248                    Op::JumpIfNot(off) => *off = fail_target - (j as i32 + 1),
1249                    Op::Jump(off)      => *off = fail_target - (j as i32 + 1),
1250                    _ => {}
1251                }
1252            }
1253            self.next_local = arm_start_locals;
1254            self.locals = arm_start_locals_map;
1255        }
1256        let panic_msg_idx = self.pool.str("non-exhaustive match");
1257        self.emit(Op::Panic(panic_msg_idx));
1258
1259        let end_target = self.code.len() as i32;
1260        for j in end_jumps {
1261            if let Op::Jump(off) = &mut self.code[j] {
1262                *off = end_target - (j as i32 + 1);
1263            }
1264        }
1265    }
1266
1267    fn compile_pattern_test(&mut self, p: &a::Pattern, bindings: &mut Vec<(String, u16)>) -> Vec<usize> {
1268        let mut fails = Vec::new();
1269        match p {
1270            a::Pattern::PWild => { self.emit(Op::Pop); }
1271            a::Pattern::PVar { name } => {
1272                let slot = self.alloc_local(name);
1273                self.emit(Op::StoreLocal(slot));
1274                bindings.push((name.clone(), slot));
1275            }
1276            a::Pattern::PLiteral { value } => {
1277                self.compile_lit(value);
1278                match value {
1279                    a::CLit::Str { .. } => self.emit(Op::StrEq),
1280                    a::CLit::Bytes { .. } => self.emit(Op::BytesEq),
1281                    // Typed-lowering for numeric literal patterns
1282                    // (#461 slice 5 prerequisite). The pattern only
1283                    // reaches its test when the scrutinee has the
1284                    // literal's type (the type checker rejects
1285                    // mismatches), so emit the type-specific Eq.
1286                    // The body_hash decoder lowers IntEq/FloatEq to
1287                    // NumEq at hash time so closure identity (#222)
1288                    // is unchanged. Enables slice 5's
1289                    // `LoadLocal + PushConst + IntEq + JumpIfNot`
1290                    // peephole to fire on pattern-match arm tests.
1291                    a::CLit::Int { .. } => self.emit(Op::IntEq),
1292                    a::CLit::Float { .. } => self.emit(Op::FloatEq),
1293                    _ => self.emit(Op::NumEq),
1294                }
1295                let j = self.code.len();
1296                self.emit(Op::JumpIfNot(0));
1297                fails.push(j);
1298            }
1299            a::Pattern::PConstructor { name, args } => {
1300                let name_idx = self.pool.variant(name);
1301                // #337: the failure path must drop the duplicated
1302                // scrutinee so subsequent match arms see a clean
1303                // stack. The previous shape
1304                //   Dup; TestVariant; JumpIfNot(fail);
1305                // left `[scrut]` on the stack at the fail target,
1306                // poisoning later arms — e.g. a wildcard `_` arm
1307                // whose body referenced an unrelated value would
1308                // pop the leaked scrutinee instead of its own value.
1309                //
1310                // New shape: branch on success, fall through to a
1311                // failure cleanup that pops the dup'd scrutinee
1312                // before jumping. The registered fail-jump is an
1313                // unconditional `Op::Jump`; `compile_match`'s patch
1314                // loop accepts both `JumpIfNot` and `Jump`.
1315                self.emit(Op::Dup);                   // [scrut, scrut]
1316                self.emit(Op::TestVariant(name_idx)); // [scrut, Bool]
1317                let j_success = self.code.len();
1318                self.emit(Op::JumpIf(0));             // pop Bool. success → [scrut]
1319                self.emit(Op::Pop);                   // failure cleanup: [scrut] → []
1320                let j_fail = self.code.len();
1321                self.emit(Op::Jump(0));               // → fail target with []
1322                fails.push(j_fail);
1323                let success_target = self.code.len() as i32;
1324                if let Op::JumpIf(off) = &mut self.code[j_success] {
1325                    *off = success_target - (j_success as i32 + 1);
1326                }
1327                if args.is_empty() {
1328                    self.emit(Op::Pop);
1329                } else if args.len() == 1 {
1330                    self.emit(Op::GetVariantArg(0));
1331                    let sub_fails = self.compile_pattern_test(&args[0], bindings);
1332                    fails.extend(sub_fails);
1333                } else {
1334                    let slot = self.alloc_local("__variant");
1335                    self.emit(Op::StoreLocal(slot));
1336                    for (i, arg) in args.iter().enumerate() {
1337                        self.emit(Op::LoadLocal(slot));
1338                        self.emit(Op::GetVariantArg(i as u16));
1339                        let sub_fails = self.compile_pattern_test(arg, bindings);
1340                        fails.extend(sub_fails);
1341                    }
1342                }
1343            }
1344            a::Pattern::PRecord { fields } => {
1345                let slot = self.alloc_local("__record");
1346                self.emit(Op::StoreLocal(slot));
1347                for f in fields {
1348                    self.emit(Op::LoadLocal(slot));
1349                    let name_idx = self.pool.field(&f.name);
1350                    let site_idx = self.field_get_sites;
1351                    self.field_get_sites += 1;
1352                    self.emit(Op::GetField { name_idx, site_idx });
1353                    let sub_fails = self.compile_pattern_test(&f.pattern, bindings);
1354                    fails.extend(sub_fails);
1355                }
1356            }
1357            a::Pattern::PTuple { items } => {
1358                let slot = self.alloc_local("__tuple");
1359                self.emit(Op::StoreLocal(slot));
1360                for (i, item) in items.iter().enumerate() {
1361                    self.emit(Op::LoadLocal(slot));
1362                    self.emit(Op::GetElem(i as u16));
1363                    let sub_fails = self.compile_pattern_test(item, bindings);
1364                    fails.extend(sub_fails);
1365                }
1366            }
1367        }
1368        fails
1369    }
1370
1371    /// Compile a Lambda: collect free variables that resolve to outer-scope
1372    /// locals, register a synthetic function, emit MakeClosure with the
1373    /// captured values pushed in order.
1374    fn compile_lambda(&mut self, params: &[a::Param], body: &a::CExpr) {
1375        // Free vars = vars referenced in body that aren't bound locally.
1376        let mut bound: std::collections::HashSet<String> = params.iter().map(|p| p.name.clone()).collect();
1377        let mut frees: Vec<String> = Vec::new();
1378        free_vars(body, &mut bound, &mut frees);
1379
1380        // Filter to those that are in the enclosing locals (captures).
1381        // Don't exclude names that *also* exist in `function_names`:
1382        // if the name is in `locals`, the local shadows the global
1383        // within this scope, and the lambda needs to capture the
1384        // local's value, not the global fn. (#339) Names that are
1385        // ONLY in `function_names` (no local) stay external — the
1386        // lambda's body resolves them at call time, same as the
1387        // enclosing fn would.
1388        let captures: Vec<String> = frees.into_iter()
1389            .filter(|n| self.locals.contains_key(n))
1390            .collect();
1391
1392        // Allocate a fresh fn_id by appending a placeholder Function.
1393        let fn_id = self.next_fn_id.len() as u32;
1394        self.next_fn_id.push(Function {
1395            name: format!("__lambda_{fn_id}"),
1396            arity: (captures.len() + params.len()) as u16,
1397            locals_count: 0,
1398            code: Vec::new(),
1399            effects: Vec::new(),
1400            // See #222: filled in at the end of the compile pass.
1401            body_hash: crate::program::ZERO_BODY_HASH,
1402            // Lambdas don't carry refinements at the surface today
1403            // (closure params don't accept `Type{x | ...}` syntax in
1404            // the parser). #209 stays focused on top-level fn decls;
1405            // closure-param refinements are a follow-up.
1406            refinements: Vec::new(),
1407            // Lambda body hasn't been compiled yet; filled in by the
1408            // deferred lambda-compile pass after FnCompiler walks it.
1409            field_ic_sites: 0,
1410        });
1411
1412        // Emit code at the lambda site: load each captured local, then MakeClosure.
1413        for c in &captures {
1414            let slot = *self.locals.get(c).expect("free var must be in scope");
1415            self.emit(Op::LoadLocal(slot));
1416        }
1417        self.emit(Op::MakeClosure { fn_id, capture_count: captures.len() as u16 });
1418
1419        // Queue the body for later compilation.
1420        self.pending_lambdas.push(PendingLambda {
1421            fn_id,
1422            capture_names: captures,
1423            params: params.to_vec(),
1424            body: body.clone(),
1425        });
1426    }
1427
1428    /// Higher-order stdlib ops on Result/Option whose function arg is a
1429    /// closure. Emit inline: pattern-match on the variant, invoke the
1430    /// closure when applicable, return wrapped result.
1431    fn try_emit_higher_order(
1432        &mut self,
1433        module: &str,
1434        op: &str,
1435        args: &[a::CExpr],
1436        node_id_idx: u32,
1437    ) -> bool {
1438        match (module, op) {
1439            ("result", "map") => self.emit_variant_map(args, "Ok", true),
1440            ("result", "and_then") => self.emit_variant_map(args, "Ok", false),
1441            ("result", "map_err") => self.emit_variant_map(args, "Err", true),
1442            ("result", "or_else") => self.emit_variant_or_else(args, "Err", 1),
1443            ("option", "map") => self.emit_variant_map(args, "Some", true),
1444            ("option", "and_then") => self.emit_variant_map(args, "Some", false),
1445            ("option", "or_else") => self.emit_variant_or_else(args, "None", 0),
1446            ("option", "unwrap_or_else") => self.emit_option_unwrap_or_else(args),
1447            ("list", "map") => self.emit_list_map(args),
1448            ("list", "par_map") => self.emit_list_par_map(args),
1449            ("list", "sort_by") => self.emit_list_sort_by(args),
1450            ("list", "filter") => self.emit_list_filter(args),
1451            ("list", "fold") => self.emit_list_fold(args),
1452            ("iter", "from_list") => self.emit_iter_from_list(args),
1453            ("iter", "unfold")    => self.emit_iter_unfold(args),
1454            ("iter", "next")      => self.emit_iter_next(args),
1455            ("iter", "is_empty")  => self.emit_iter_is_empty(args),
1456            ("iter", "count")     => self.emit_iter_count(args),
1457            ("iter", "take")      => self.emit_iter_take(args),
1458            ("iter", "skip")      => self.emit_iter_skip(args),
1459            ("iter", "to_list")   => self.emit_iter_to_list(args),
1460            ("iter", "collect")   => self.emit_iter_to_list(args),
1461            ("iter", "map")       => self.emit_iter_map(args),
1462            ("iter", "filter")    => self.emit_iter_filter(args),
1463            ("iter", "fold")      => self.emit_iter_fold(args),
1464            ("map", "fold") => self.emit_map_fold(args, node_id_idx),
1465            ("flow", "sequential") => self.emit_flow_sequential(args),
1466            ("flow", "branch") => self.emit_flow_branch(args),
1467            ("flow", "retry") => self.emit_flow_retry(args),
1468            ("flow", "retry_with_backoff") => self.emit_flow_retry_with_backoff(args),
1469            ("flow", "parallel") => self.emit_flow_parallel(args),
1470            ("flow", "parallel_list") => self.emit_flow_parallel_list(args),
1471            _ => return false,
1472        }
1473        true
1474    }
1475
1476    /// `list.map(xs, f)` — native map op (#464). Pushes `xs` then `f`
1477    /// and emits a single `Op::ListMap`. The previous inlined loop
1478    /// re-`LoadLocal`'d (cloned) the whole input and accumulator lists
1479    /// each iteration — O(n²); the native op owns the list and builds
1480    /// the result with one pre-sized allocation.
1481    fn emit_list_map(&mut self, args: &[a::CExpr]) {
1482        self.compile_expr(&args[0], false); // xs
1483        self.compile_expr(&args[1], false); // f
1484        let nid = self.pool.node_id("n_list_map");
1485        self.emit(Op::ListMap { node_id_idx: nid });
1486    }
1487
1488    /// `list.par_map(xs, f)` (#305 slice 1). Pushes `xs` and `f`,
1489    /// then emits a single `Op::ParallelMap` — the VM applies `f`
1490    /// to each element on OS-thread tasks, capped by
1491    /// `LEX_PAR_MAX_CONCURRENCY`. Returns the result list in input
1492    /// order.
1493    fn emit_list_par_map(&mut self, args: &[a::CExpr]) {
1494        self.compile_expr(&args[0], false);
1495        self.compile_expr(&args[1], false);
1496        let nid = self.pool.node_id("n_list_par_map");
1497        self.emit(Op::ParallelMap { node_id_idx: nid });
1498    }
1499
1500    /// `list.sort_by(xs, f)` (#338). Pushes `xs` and the key-fn
1501    /// `f`, then emits a single `Op::SortByKey` — the VM invokes
1502    /// `f` on each element to derive a sortable key, stable-sorts
1503    /// by key, and returns the values in sorted order. Keys must
1504    /// resolve to `Int` / `Float` / `Str`; mixed-type pairs are
1505    /// treated as equal by the comparator (preserving insertion
1506    /// order via the stable sort).
1507    fn emit_list_sort_by(&mut self, args: &[a::CExpr]) {
1508        self.compile_expr(&args[0], false);
1509        self.compile_expr(&args[1], false);
1510        let nid = self.pool.node_id("n_list_sort_by");
1511        self.emit(Op::SortByKey { node_id_idx: nid });
1512    }
1513
1514    /// `list.filter(xs, pred)` — native filter op (#464). Same
1515    /// rationale as `emit_list_map`.
1516    fn emit_list_filter(&mut self, args: &[a::CExpr]) {
1517        self.compile_expr(&args[0], false); // xs
1518        self.compile_expr(&args[1], false); // pred
1519        let nid = self.pool.node_id("n_list_filter");
1520        self.emit(Op::ListFilter { node_id_idx: nid });
1521    }
1522
1523    /// `list.fold(xs, init, f)` — native left-fold op (#464). Same
1524    /// rationale as `emit_list_map`. Stack: `[xs, init, f]`.
1525    fn emit_list_fold(&mut self, args: &[a::CExpr]) {
1526        self.compile_expr(&args[0], false); // xs
1527        self.compile_expr(&args[1], false); // init
1528        self.compile_expr(&args[2], false); // f
1529        let nid = self.pool.node_id("n_list_fold");
1530        self.emit(Op::ListFold { node_id_idx: nid });
1531    }
1532
1533    // ── Iter[T] operations (#364) ─────────────────────────────────────────
1534    // Internal representation: `Value::Variant("__IterEager", [list, idx])`
1535    // for the eager form (a List backing store + Int cursor) and
1536    // `Value::Variant("__IterLazy", [seed, step_closure])` for the lazy form
1537    // produced by `iter.unfold` (#376). Both are tagged variants so each op
1538    // can `TestVariant` at runtime to dispatch. The names start with `__` so
1539    // they can't be written by user code (uppercase ASCII-letter is required
1540    // for constructor names, and the underscores keep them out of the
1541    // user-namespace by convention).
1542
1543    /// `iter.from_list(xs)` — wrap a list in an eager iterator at position 0.
1544    fn emit_iter_from_list(&mut self, args: &[a::CExpr]) {
1545        self.compile_expr(&args[0], false);
1546        let zero = self.pool.int(0);
1547        self.emit(Op::PushConst(zero));
1548        let v = self.pool.variant("__IterEager");
1549        self.emit(Op::MakeVariant { name_idx: v, arity: 2 });
1550    }
1551
1552    /// `iter.next(it)` — advance one step; returns `Option[(T, Iter[T])]`.
1553    ///
1554    /// Dispatches on the iter's variant tag:
1555    /// - `__IterLazy(seed, step)` (#376) → invoke `step(seed)`. On
1556    ///   `Some((t, s'))` wrap as `Some((t, __IterLazy(s', step)))`; on
1557    ///   `None` propagate `None`. The seed advances forward each call.
1558    /// - `__IterCursor(handle)` (#379) → effect-call `sql.cursor_next(handle)`
1559    ///   which returns `Option[T]`. On `Some(row)` wrap as
1560    ///   `Some((row, __IterCursor(handle)))`; on `None` propagate. Handle
1561    ///   stays stable across calls — state is server-side / mpsc-buffered.
1562    /// - `__IterEager(list, idx)` → existing positional cursor.
1563    fn emit_iter_next(&mut self, args: &[a::CExpr]) {
1564        self.compile_expr(&args[0], false);
1565        let it = self.alloc_local("__in_it");
1566        self.emit(Op::StoreLocal(it));
1567
1568        // Dispatch: TestVariant pops; we Dup to keep the iter around.
1569        self.emit(Op::LoadLocal(it));
1570        self.emit(Op::Dup);
1571        let lazy_name = self.pool.variant("__IterLazy");
1572        self.emit(Op::TestVariant(lazy_name));
1573        let j_to_check_cursor = self.code.len();
1574        self.emit(Op::JumpIfNot(0));
1575
1576        // ── lazy path ────────────────────────────────────────────────
1577        // The Dup'd iter is on stack but we've consumed it via TestVariant,
1578        // so reload from the local.
1579        self.emit(Op::LoadLocal(it));
1580        self.emit(Op::GetVariantArg(0)); // seed
1581        let seed = self.alloc_local("__in_seed");
1582        self.emit(Op::StoreLocal(seed));
1583
1584        self.emit(Op::LoadLocal(it));
1585        self.emit(Op::GetVariantArg(1)); // step closure
1586        let step = self.alloc_local("__in_step");
1587        self.emit(Op::StoreLocal(step));
1588
1589        // Call step(seed) → Option[(T, S)].
1590        let nid_lazy = self.pool.node_id("n_iter_next_lazy");
1591        self.emit(Op::LoadLocal(step));
1592        self.emit(Op::LoadLocal(seed));
1593        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid_lazy });
1594        let opt = self.alloc_local("__in_opt");
1595        self.emit(Op::StoreLocal(opt));
1596
1597        // If `step` returned None, propagate it directly.
1598        self.emit(Op::LoadLocal(opt));
1599        let some_name = self.pool.variant("Some");
1600        self.emit(Op::TestVariant(some_name));
1601        let j_lazy_none = self.code.len();
1602        self.emit(Op::JumpIfNot(0));
1603
1604        // Some((t, new_seed)) — extract the inner tuple, repackage as
1605        // Some((t, __IterLazy(new_seed, step))) so the next call advances.
1606        self.emit(Op::LoadLocal(opt));
1607        self.emit(Op::GetVariantArg(0));     // (t, new_seed)
1608        let pair = self.alloc_local("__in_pair");
1609        self.emit(Op::StoreLocal(pair));
1610
1611        self.emit(Op::LoadLocal(pair));
1612        self.emit(Op::GetElem(0));           // t
1613        self.emit(Op::LoadLocal(pair));
1614        self.emit(Op::GetElem(1));           // new_seed
1615        self.emit(Op::LoadLocal(step));      // step closure
1616        let lazy_v = self.pool.variant("__IterLazy");
1617        self.emit(Op::MakeVariant { name_idx: lazy_v, arity: 2 }); // __IterLazy(new_seed, step)
1618        self.emit(Op::MakeTuple(2));         // (t, new_iter)
1619        let some_v = self.pool.variant("Some");
1620        self.emit(Op::MakeVariant { name_idx: some_v, arity: 1 });
1621        let j_after_lazy = self.code.len();
1622        self.emit(Op::Jump(0));
1623
1624        // Lazy → None: just forward the None.
1625        let none_t = self.code.len() as i32;
1626        if let Op::JumpIfNot(off) = &mut self.code[j_lazy_none] {
1627            *off = none_t - (j_lazy_none as i32 + 1);
1628        }
1629        let none_v = self.pool.variant("None");
1630        self.emit(Op::MakeVariant { name_idx: none_v, arity: 0 });
1631        let j_after_lazy_none = self.code.len();
1632        self.emit(Op::Jump(0));
1633
1634        // ── cursor path (#379) ───────────────────────────────────────
1635        let cursor_check_t = self.code.len() as i32;
1636        if let Op::JumpIfNot(off) = &mut self.code[j_to_check_cursor] {
1637            *off = cursor_check_t - (j_to_check_cursor as i32 + 1);
1638        }
1639
1640        self.emit(Op::LoadLocal(it));
1641        self.emit(Op::Dup);
1642        let cursor_name = self.pool.variant("__IterCursor");
1643        self.emit(Op::TestVariant(cursor_name));
1644        let j_to_eager = self.code.len();
1645        self.emit(Op::JumpIfNot(0));
1646
1647        // Cursor path: extract handle, effect-call sql.cursor_next(handle).
1648        // The handler returns Option[T] directly. We then wrap as
1649        // Some((T, __IterCursor(handle))) or forward None.
1650        self.emit(Op::LoadLocal(it));
1651        self.emit(Op::GetVariantArg(0));     // handle
1652        let handle = self.alloc_local("__in_handle");
1653        self.emit(Op::StoreLocal(handle));
1654
1655        let kind_idx = self.pool.str("sql");
1656        let op_idx = self.pool.str("cursor_next");
1657        let nid_cursor = self.pool.node_id("n_iter_next_cursor");
1658        self.emit(Op::LoadLocal(handle));
1659        self.emit(Op::EffectCall {
1660            kind_idx,
1661            op_idx,
1662            arity: 1,
1663            node_id_idx: nid_cursor,
1664        });
1665        let cur_opt = self.alloc_local("__in_cur_opt");
1666        self.emit(Op::StoreLocal(cur_opt));
1667
1668        self.emit(Op::LoadLocal(cur_opt));
1669        let some_c = self.pool.variant("Some");
1670        self.emit(Op::TestVariant(some_c));
1671        let j_cursor_none = self.code.len();
1672        self.emit(Op::JumpIfNot(0));
1673
1674        // Some(row): build Some((row, __IterCursor(handle)))
1675        self.emit(Op::LoadLocal(cur_opt));
1676        self.emit(Op::GetVariantArg(0));     // row
1677        self.emit(Op::LoadLocal(handle));
1678        let cursor_v = self.pool.variant("__IterCursor");
1679        self.emit(Op::MakeVariant { name_idx: cursor_v, arity: 1 });
1680        self.emit(Op::MakeTuple(2));         // (row, __IterCursor(handle))
1681        let some_c2 = self.pool.variant("Some");
1682        self.emit(Op::MakeVariant { name_idx: some_c2, arity: 1 });
1683        let j_after_cursor = self.code.len();
1684        self.emit(Op::Jump(0));
1685
1686        // Cursor → None
1687        let cursor_none_t = self.code.len() as i32;
1688        if let Op::JumpIfNot(off) = &mut self.code[j_cursor_none] {
1689            *off = cursor_none_t - (j_cursor_none as i32 + 1);
1690        }
1691        let none_c = self.pool.variant("None");
1692        self.emit(Op::MakeVariant { name_idx: none_c, arity: 0 });
1693        let j_after_cursor_none = self.code.len();
1694        self.emit(Op::Jump(0));
1695
1696        // ── eager path ───────────────────────────────────────────────
1697        let eager_t = self.code.len() as i32;
1698        if let Op::JumpIfNot(off) = &mut self.code[j_to_eager] {
1699            *off = eager_t - (j_to_eager as i32 + 1);
1700        }
1701
1702        self.emit(Op::LoadLocal(it));
1703        self.emit(Op::GetVariantArg(0));
1704        let list = self.alloc_local("__in_list");
1705        self.emit(Op::StoreLocal(list));
1706
1707        self.emit(Op::LoadLocal(it));
1708        self.emit(Op::GetVariantArg(1));
1709        let idx = self.alloc_local("__in_idx");
1710        self.emit(Op::StoreLocal(idx));
1711
1712        // if idx < len(list)
1713        self.emit(Op::LoadLocal(idx));
1714        self.emit(Op::LoadLocal(list));
1715        self.emit(Op::GetListLen);
1716        self.emit(Op::IntLt);
1717        let j_eager_else = self.code.len();
1718        self.emit(Op::JumpIfNot(0));
1719
1720        // Some((item, __IterEager(list, idx+1)))
1721        self.emit(Op::LoadLocal(list));
1722        self.emit(Op::LoadLocal(idx));
1723        self.emit(Op::GetListElemDyn);
1724
1725        self.emit(Op::LoadLocal(list));
1726        self.emit(Op::LoadLocal(idx));
1727        let one = self.pool.int(1);
1728        self.emit(Op::PushConst(one));
1729        self.emit(Op::IntAdd);
1730        let eager_v = self.pool.variant("__IterEager");
1731        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
1732        self.emit(Op::MakeTuple(2));
1733        let some_e = self.pool.variant("Some");
1734        self.emit(Op::MakeVariant { name_idx: some_e, arity: 1 });
1735        let j_after_eager = self.code.len();
1736        self.emit(Op::Jump(0));
1737
1738        // Eager → None
1739        let eager_none_t = self.code.len() as i32;
1740        if let Op::JumpIfNot(off) = &mut self.code[j_eager_else] {
1741            *off = eager_none_t - (j_eager_else as i32 + 1);
1742        }
1743        let none_e = self.pool.variant("None");
1744        self.emit(Op::MakeVariant { name_idx: none_e, arity: 0 });
1745
1746        // Converge all paths.
1747        let end = self.code.len() as i32;
1748        if let Op::Jump(off) = &mut self.code[j_after_lazy] {
1749            *off = end - (j_after_lazy as i32 + 1);
1750        }
1751        if let Op::Jump(off) = &mut self.code[j_after_lazy_none] {
1752            *off = end - (j_after_lazy_none as i32 + 1);
1753        }
1754        if let Op::Jump(off) = &mut self.code[j_after_cursor] {
1755            *off = end - (j_after_cursor as i32 + 1);
1756        }
1757        if let Op::Jump(off) = &mut self.code[j_after_cursor_none] {
1758            *off = end - (j_after_cursor_none as i32 + 1);
1759        }
1760        if let Op::Jump(off) = &mut self.code[j_after_eager] {
1761            *off = end - (j_after_eager as i32 + 1);
1762        }
1763    }
1764
1765    /// `iter.unfold(seed, step)` — lazy iterator that calls `step(seed)` on
1766    /// each `iter.next` and threads the new seed forward. Internal value
1767    /// shape: `__IterLazy(seed, step)`. Step has type `(S) -> Option[(T, S)]`;
1768    /// returning `None` ends the iteration (#376).
1769    fn emit_iter_unfold(&mut self, args: &[a::CExpr]) {
1770        self.compile_expr(&args[0], false); // seed
1771        self.compile_expr(&args[1], false); // step
1772        let lazy = self.pool.variant("__IterLazy");
1773        self.emit(Op::MakeVariant { name_idx: lazy, arity: 2 });
1774    }
1775
1776    /// `iter.is_empty(it)` — true iff no further element. v1 supports the
1777    /// eager form O(1); on a lazy iter the seed sits in slot 0 and is not a
1778    /// List, so the VM trips on `GetListLen` rather than returning a wrong
1779    /// answer. Callers needing lazy support should materialize with
1780    /// `iter.to_list` first or call `iter.next` and pattern-match.
1781    fn emit_iter_is_empty(&mut self, args: &[a::CExpr]) {
1782        self.compile_expr(&args[0], false);
1783        let it = self.alloc_local("__ie_it");
1784        self.emit(Op::StoreLocal(it));
1785
1786        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1)); // idx
1787        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0)); // list
1788        self.emit(Op::GetListLen);                                     // len
1789        self.emit(Op::IntLt);                                          // idx < len
1790        self.emit(Op::BoolNot);                                        // NOT(idx < len)
1791    }
1792
1793    /// `iter.count(it)` — number of remaining elements (v1: eager-only).
1794    fn emit_iter_count(&mut self, args: &[a::CExpr]) {
1795        self.compile_expr(&args[0], false);
1796        let it = self.alloc_local("__ic_it");
1797        self.emit(Op::StoreLocal(it));
1798
1799        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1800        self.emit(Op::GetListLen);                                     // len
1801        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1)); // idx
1802        self.emit(Op::IntSub);                                         // len - idx
1803    }
1804
1805    /// `iter.take(it, n)` — collect up to n elements, return as new Iter.
1806    fn emit_iter_take(&mut self, args: &[a::CExpr]) {
1807        self.compile_expr(&args[0], false);
1808        let it   = self.alloc_local("__itk_it");
1809        self.emit(Op::StoreLocal(it));
1810
1811        self.compile_expr(&args[1], false);
1812        let n    = self.alloc_local("__itk_n");
1813        self.emit(Op::StoreLocal(n));
1814
1815        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1816        let list = self.alloc_local("__itk_list");
1817        self.emit(Op::StoreLocal(list));
1818
1819        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
1820        let i    = self.alloc_local("__itk_i");
1821        self.emit(Op::StoreLocal(i));
1822
1823        self.emit(Op::MakeList(0));
1824        let out  = self.alloc_local("__itk_out");
1825        self.emit(Op::StoreLocal(out));
1826
1827        let zero = self.pool.int(0);
1828        self.emit(Op::PushConst(zero));
1829        let cnt  = self.alloc_local("__itk_cnt");
1830        self.emit(Op::StoreLocal(cnt));
1831
1832        let loop_top = self.code.len();
1833
1834        // while cnt < n
1835        self.emit(Op::LoadLocal(cnt));
1836        self.emit(Op::LoadLocal(n));
1837        self.emit(Op::IntLt);
1838        let j_exit_n = self.code.len();
1839        self.emit(Op::JumpIfNot(0));
1840
1841        // AND i < len(list)
1842        self.emit(Op::LoadLocal(i));
1843        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
1844        self.emit(Op::IntLt);
1845        let j_exit_l = self.code.len();
1846        self.emit(Op::JumpIfNot(0));
1847
1848        // out = out ++ [list[i]]
1849        self.emit(Op::LoadLocal(out));
1850        self.emit(Op::LoadLocal(list));
1851        self.emit(Op::LoadLocal(i));
1852        self.emit(Op::GetListElemDyn);
1853        self.emit(Op::ListAppend);
1854        self.emit(Op::StoreLocal(out));
1855
1856        let one = self.pool.int(1);
1857        // i = i + 1
1858        self.emit(Op::LoadLocal(i));
1859        self.emit(Op::PushConst(one));
1860        self.emit(Op::IntAdd);
1861        self.emit(Op::StoreLocal(i));
1862        // cnt = cnt + 1
1863        self.emit(Op::LoadLocal(cnt));
1864        self.emit(Op::PushConst(one));
1865        self.emit(Op::IntAdd);
1866        self.emit(Op::StoreLocal(cnt));
1867
1868        let jback = self.code.len();
1869        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
1870
1871        let exit_t = self.code.len() as i32;
1872        if let Op::JumpIfNot(off) = &mut self.code[j_exit_n] { *off = exit_t - (j_exit_n as i32 + 1); }
1873        if let Op::JumpIfNot(off) = &mut self.code[j_exit_l] { *off = exit_t - (j_exit_l as i32 + 1); }
1874
1875        // return new __IterEager(out, 0)
1876        self.emit(Op::LoadLocal(out));
1877        self.emit(Op::PushConst(zero));
1878        let eager_v = self.pool.variant("__IterEager");
1879        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
1880    }
1881
1882    /// `iter.skip(it, n)` — advance cursor by n (or to end), return new Iter.
1883    fn emit_iter_skip(&mut self, args: &[a::CExpr]) {
1884        self.compile_expr(&args[0], false);
1885        let it   = self.alloc_local("__isk_it");
1886        self.emit(Op::StoreLocal(it));
1887
1888        self.compile_expr(&args[1], false);
1889        let n    = self.alloc_local("__isk_n");
1890        self.emit(Op::StoreLocal(n));
1891
1892        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1893        let list = self.alloc_local("__isk_list");
1894        self.emit(Op::StoreLocal(list));
1895
1896        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
1897        let idx  = self.alloc_local("__isk_idx");
1898        self.emit(Op::StoreLocal(idx));
1899
1900        // raw = idx + n
1901        self.emit(Op::LoadLocal(idx));
1902        self.emit(Op::LoadLocal(n));
1903        self.emit(Op::IntAdd);
1904        let raw  = self.alloc_local("__isk_raw");
1905        self.emit(Op::StoreLocal(raw));
1906
1907        // new_idx = if raw < len then raw else len
1908        self.emit(Op::LoadLocal(raw));
1909        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
1910        self.emit(Op::IntLt);
1911        let j_use_raw = self.code.len();
1912        self.emit(Op::JumpIf(0));
1913
1914        // use len
1915        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
1916        let j_end = self.code.len();
1917        self.emit(Op::Jump(0));
1918
1919        // use raw
1920        let raw_t = self.code.len() as i32;
1921        if let Op::JumpIf(off) = &mut self.code[j_use_raw] { *off = raw_t - (j_use_raw as i32 + 1); }
1922        self.emit(Op::LoadLocal(raw));
1923
1924        let end_t = self.code.len() as i32;
1925        if let Op::Jump(off) = &mut self.code[j_end] { *off = end_t - (j_end as i32 + 1); }
1926
1927        // new_idx on stack; build new __IterEager(list, new_idx)
1928        let new_idx = self.alloc_local("__isk_ni");
1929        self.emit(Op::StoreLocal(new_idx));
1930        self.emit(Op::LoadLocal(list));
1931        self.emit(Op::LoadLocal(new_idx));
1932        let eager_v = self.pool.variant("__IterEager");
1933        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
1934    }
1935
1936    /// `iter.to_list(it)` — materialise remaining elements into a List.
1937    ///
1938    /// Dispatches on the iter variant (#376):
1939    /// - `__IterLazy`: repeatedly call `step(seed)`; on `Some((t, s'))` append
1940    ///   `t` and continue with `s'`; on `None` stop. May hang on truly
1941    ///   infinite producers — that's documented as a v1 limitation, the
1942    ///   step-limit-protected caller is what catches misuse.
1943    /// - `__IterEager`: slice the backing list from `idx` onward (O(n) walk).
1944    fn emit_iter_to_list(&mut self, args: &[a::CExpr]) {
1945        self.compile_expr(&args[0], false);
1946        let it = self.alloc_local("__itl_it");
1947        self.emit(Op::StoreLocal(it));
1948
1949        // Build the output list up-front, shared across both paths.
1950        self.emit(Op::MakeList(0));
1951        let out = self.alloc_local("__itl_out");
1952        self.emit(Op::StoreLocal(out));
1953
1954        // Dispatch on variant tag.
1955        self.emit(Op::LoadLocal(it));
1956        let lazy_name = self.pool.variant("__IterLazy");
1957        self.emit(Op::TestVariant(lazy_name));
1958        let j_to_eager = self.code.len();
1959        self.emit(Op::JumpIfNot(0));
1960
1961        // ── lazy path ─────────────────────────────────────────────────
1962        // seed and step closure live in locals; we update seed each iteration.
1963        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1964        let seed = self.alloc_local("__itl_seed");
1965        self.emit(Op::StoreLocal(seed));
1966
1967        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
1968        let step = self.alloc_local("__itl_step");
1969        self.emit(Op::StoreLocal(step));
1970
1971        let lazy_loop = self.code.len();
1972        let nid_lazy = self.pool.node_id("n_iter_to_list_lazy");
1973        self.emit(Op::LoadLocal(step));
1974        self.emit(Op::LoadLocal(seed));
1975        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid_lazy });
1976        let opt = self.alloc_local("__itl_opt");
1977        self.emit(Op::StoreLocal(opt));
1978
1979        // If None, drop out of the lazy loop.
1980        self.emit(Op::LoadLocal(opt));
1981        let some_name = self.pool.variant("Some");
1982        self.emit(Op::TestVariant(some_name));
1983        let j_lazy_exit = self.code.len();
1984        self.emit(Op::JumpIfNot(0));
1985
1986        // Some((t, new_seed)): append t to out, replace seed.
1987        self.emit(Op::LoadLocal(opt));
1988        self.emit(Op::GetVariantArg(0));
1989        let pair = self.alloc_local("__itl_pair");
1990        self.emit(Op::StoreLocal(pair));
1991
1992        self.emit(Op::LoadLocal(out));
1993        self.emit(Op::LoadLocal(pair)); self.emit(Op::GetElem(0));
1994        self.emit(Op::ListAppend);
1995        self.emit(Op::StoreLocal(out));
1996
1997        self.emit(Op::LoadLocal(pair)); self.emit(Op::GetElem(1));
1998        self.emit(Op::StoreLocal(seed));
1999
2000        let jback_lazy = self.code.len();
2001        self.emit(Op::Jump((lazy_loop as i32) - (jback_lazy as i32 + 1)));
2002
2003        let lazy_exit_t = self.code.len() as i32;
2004        if let Op::JumpIfNot(off) = &mut self.code[j_lazy_exit] {
2005            *off = lazy_exit_t - (j_lazy_exit as i32 + 1);
2006        }
2007        let j_after_lazy = self.code.len();
2008        self.emit(Op::Jump(0));
2009
2010        // ── eager path ────────────────────────────────────────────────
2011        let eager_t = self.code.len() as i32;
2012        if let Op::JumpIfNot(off) = &mut self.code[j_to_eager] {
2013            *off = eager_t - (j_to_eager as i32 + 1);
2014        }
2015
2016        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2017        let list = self.alloc_local("__itl_list");
2018        self.emit(Op::StoreLocal(list));
2019
2020        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2021        let i = self.alloc_local("__itl_i");
2022        self.emit(Op::StoreLocal(i));
2023
2024        let loop_top = self.code.len();
2025        self.emit(Op::LoadLocal(i));
2026        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2027        self.emit(Op::IntLt);
2028        let j_exit = self.code.len();
2029        self.emit(Op::JumpIfNot(0));
2030
2031        self.emit(Op::LoadLocal(out));
2032        self.emit(Op::LoadLocal(list));
2033        self.emit(Op::LoadLocal(i));
2034        self.emit(Op::GetListElemDyn);
2035        self.emit(Op::ListAppend);
2036        self.emit(Op::StoreLocal(out));
2037
2038        self.emit(Op::LoadLocal(i));
2039        let one = self.pool.int(1);
2040        self.emit(Op::PushConst(one));
2041        self.emit(Op::IntAdd);
2042        self.emit(Op::StoreLocal(i));
2043
2044        let jback = self.code.len();
2045        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2046
2047        let exit_t = self.code.len() as i32;
2048        if let Op::JumpIfNot(off) = &mut self.code[j_exit] {
2049            *off = exit_t - (j_exit as i32 + 1);
2050        }
2051
2052        // Converge: lazy path falls through here too.
2053        let converge = self.code.len() as i32;
2054        if let Op::Jump(off) = &mut self.code[j_after_lazy] {
2055            *off = converge - (j_after_lazy as i32 + 1);
2056        }
2057        self.emit(Op::LoadLocal(out));
2058    }
2059
2060    /// `iter.map(it, f)` — apply `f` to each remaining element; returns new Iter.
2061    fn emit_iter_map(&mut self, args: &[a::CExpr]) {
2062        self.compile_expr(&args[0], false);
2063        let it   = self.alloc_local("__im_it");
2064        self.emit(Op::StoreLocal(it));
2065
2066        self.compile_expr(&args[1], false);
2067        let f    = self.alloc_local("__im_f");
2068        self.emit(Op::StoreLocal(f));
2069
2070        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2071        let list = self.alloc_local("__im_list");
2072        self.emit(Op::StoreLocal(list));
2073
2074        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2075        let i    = self.alloc_local("__im_i");
2076        self.emit(Op::StoreLocal(i));
2077
2078        self.emit(Op::MakeList(0));
2079        let out  = self.alloc_local("__im_out");
2080        self.emit(Op::StoreLocal(out));
2081
2082        let loop_top = self.code.len();
2083        self.emit(Op::LoadLocal(i));
2084        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2085        self.emit(Op::IntLt);
2086        let j_exit = self.code.len();
2087        self.emit(Op::JumpIfNot(0));
2088
2089        let nid = self.pool.node_id("n_iter_map");
2090        self.emit(Op::LoadLocal(out));
2091        self.emit(Op::LoadLocal(f));
2092        self.emit(Op::LoadLocal(list));
2093        self.emit(Op::LoadLocal(i));
2094        self.emit(Op::GetListElemDyn);
2095        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2096        self.emit(Op::ListAppend);
2097        self.emit(Op::StoreLocal(out));
2098
2099        self.emit(Op::LoadLocal(i));
2100        let one = self.pool.int(1);
2101        self.emit(Op::PushConst(one));
2102        self.emit(Op::IntAdd);
2103        self.emit(Op::StoreLocal(i));
2104
2105        let jback = self.code.len();
2106        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2107
2108        let exit_t = self.code.len() as i32;
2109        if let Op::JumpIfNot(off) = &mut self.code[j_exit] { *off = exit_t - (j_exit as i32 + 1); }
2110
2111        let zero = self.pool.int(0);
2112        self.emit(Op::LoadLocal(out));
2113        self.emit(Op::PushConst(zero));
2114        let eager_v = self.pool.variant("__IterEager");
2115        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
2116    }
2117
2118    /// `iter.filter(it, pred)` — keep elements where pred is true; returns new Iter.
2119    fn emit_iter_filter(&mut self, args: &[a::CExpr]) {
2120        self.compile_expr(&args[0], false);
2121        let it   = self.alloc_local("__if_it");
2122        self.emit(Op::StoreLocal(it));
2123
2124        self.compile_expr(&args[1], false);
2125        let f    = self.alloc_local("__if_f");
2126        self.emit(Op::StoreLocal(f));
2127
2128        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2129        let list = self.alloc_local("__if_list");
2130        self.emit(Op::StoreLocal(list));
2131
2132        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2133        let i    = self.alloc_local("__if_i");
2134        self.emit(Op::StoreLocal(i));
2135
2136        self.emit(Op::MakeList(0));
2137        let out  = self.alloc_local("__if_out");
2138        self.emit(Op::StoreLocal(out));
2139
2140        let loop_top = self.code.len();
2141        self.emit(Op::LoadLocal(i));
2142        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2143        self.emit(Op::IntLt);
2144        let j_exit = self.code.len();
2145        self.emit(Op::JumpIfNot(0));
2146
2147        // elem := list[i]
2148        self.emit(Op::LoadLocal(list));
2149        self.emit(Op::LoadLocal(i));
2150        self.emit(Op::GetListElemDyn);
2151        let x    = self.alloc_local("__if_x");
2152        self.emit(Op::StoreLocal(x));
2153
2154        let nid = self.pool.node_id("n_iter_filter");
2155        self.emit(Op::LoadLocal(f));
2156        self.emit(Op::LoadLocal(x));
2157        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2158        let j_skip = self.code.len();
2159        self.emit(Op::JumpIfNot(0));
2160
2161        self.emit(Op::LoadLocal(out));
2162        self.emit(Op::LoadLocal(x));
2163        self.emit(Op::ListAppend);
2164        self.emit(Op::StoreLocal(out));
2165
2166        let skip_t = self.code.len() as i32;
2167        if let Op::JumpIfNot(off) = &mut self.code[j_skip] { *off = skip_t - (j_skip as i32 + 1); }
2168
2169        self.emit(Op::LoadLocal(i));
2170        let one = self.pool.int(1);
2171        self.emit(Op::PushConst(one));
2172        self.emit(Op::IntAdd);
2173        self.emit(Op::StoreLocal(i));
2174
2175        let jback = self.code.len();
2176        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2177
2178        let exit_t = self.code.len() as i32;
2179        if let Op::JumpIfNot(off) = &mut self.code[j_exit] { *off = exit_t - (j_exit as i32 + 1); }
2180
2181        let zero = self.pool.int(0);
2182        self.emit(Op::LoadLocal(out));
2183        self.emit(Op::PushConst(zero));
2184        let eager_v = self.pool.variant("__IterEager");
2185        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
2186    }
2187
2188    /// `iter.fold(it, init, f)` — left fold over remaining elements.
2189    fn emit_iter_fold(&mut self, args: &[a::CExpr]) {
2190        self.compile_expr(&args[0], false);
2191        let it   = self.alloc_local("__ifo_it");
2192        self.emit(Op::StoreLocal(it));
2193
2194        self.compile_expr(&args[1], false);
2195        let acc  = self.alloc_local("__ifo_acc");
2196        self.emit(Op::StoreLocal(acc));
2197
2198        self.compile_expr(&args[2], false);
2199        let f    = self.alloc_local("__ifo_f");
2200        self.emit(Op::StoreLocal(f));
2201
2202        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2203        let list = self.alloc_local("__ifo_list");
2204        self.emit(Op::StoreLocal(list));
2205
2206        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2207        let i    = self.alloc_local("__ifo_i");
2208        self.emit(Op::StoreLocal(i));
2209
2210        let loop_top = self.code.len();
2211        self.emit(Op::LoadLocal(i));
2212        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2213        self.emit(Op::IntLt);
2214        let j_exit = self.code.len();
2215        self.emit(Op::JumpIfNot(0));
2216
2217        let nid = self.pool.node_id("n_iter_fold");
2218        self.emit(Op::LoadLocal(f));
2219        self.emit(Op::LoadLocal(acc));
2220        self.emit(Op::LoadLocal(list));
2221        self.emit(Op::LoadLocal(i));
2222        self.emit(Op::GetListElemDyn);
2223        self.emit(Op::CallClosure { arity: 2, node_id_idx: nid });
2224        self.emit(Op::StoreLocal(acc));
2225
2226        self.emit(Op::LoadLocal(i));
2227        let one = self.pool.int(1);
2228        self.emit(Op::PushConst(one));
2229        self.emit(Op::IntAdd);
2230        self.emit(Op::StoreLocal(i));
2231
2232        let jback = self.code.len();
2233        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2234
2235        let exit_t = self.code.len() as i32;
2236        if let Op::JumpIfNot(off) = &mut self.code[j_exit] { *off = exit_t - (j_exit as i32 + 1); }
2237        self.emit(Op::LoadLocal(acc));
2238    }
2239
2240    /// `map.fold(m, init, f)` — left fold over `Map[K, V]` entries with a
2241    /// three-arg combiner `f(acc, k, v)`. Iteration order matches
2242    /// `map.entries` (BTreeMap-sorted by key). Materializes the entry
2243    /// list once via the runtime's `("map", "entries")` op, then runs
2244    /// the same inline loop as `list.fold`.
2245    fn emit_map_fold(&mut self, args: &[a::CExpr], node_id_idx: u32) {
2246        // xs := map.entries(m)
2247        self.compile_expr(&args[0], false);
2248        let map_kind = self.pool.str("map");
2249        let entries_op = self.pool.str("entries");
2250        self.emit(Op::EffectCall {
2251            kind_idx: map_kind,
2252            op_idx: entries_op,
2253            arity: 1,
2254            node_id_idx,
2255        });
2256        let xs = self.alloc_local("__mf_xs");
2257        self.emit(Op::StoreLocal(xs));
2258
2259        // acc := init
2260        self.compile_expr(&args[1], false);
2261        let acc = self.alloc_local("__mf_acc");
2262        self.emit(Op::StoreLocal(acc));
2263
2264        // f := <closure>
2265        self.compile_expr(&args[2], false);
2266        let f = self.alloc_local("__mf_f");
2267        self.emit(Op::StoreLocal(f));
2268
2269        // i := 0
2270        let zero = self.pool.int(0);
2271        self.emit(Op::PushConst(zero));
2272        let i = self.alloc_local("__mf_i");
2273        self.emit(Op::StoreLocal(i));
2274
2275        // loop_top: while i < len(xs)
2276        let loop_top = self.code.len();
2277        self.emit(Op::LoadLocal(i));
2278        self.emit(Op::LoadLocal(xs));
2279        self.emit(Op::GetListLen);
2280        self.emit(Op::IntLt);
2281        let j_exit = self.code.len();
2282        self.emit(Op::JumpIfNot(0));
2283
2284        // pair := xs[i]
2285        self.emit(Op::LoadLocal(xs));
2286        self.emit(Op::LoadLocal(i));
2287        self.emit(Op::GetListElemDyn);
2288        let pair = self.alloc_local("__mf_pair");
2289        self.emit(Op::StoreLocal(pair));
2290
2291        // acc := f(acc, pair.0, pair.1)
2292        let nid = self.pool.node_id("n_map_fold");
2293        self.emit(Op::LoadLocal(f));
2294        self.emit(Op::LoadLocal(acc));
2295        self.emit(Op::LoadLocal(pair));
2296        self.emit(Op::GetElem(0));
2297        self.emit(Op::LoadLocal(pair));
2298        self.emit(Op::GetElem(1));
2299        self.emit(Op::CallClosure { arity: 3, node_id_idx: nid });
2300        self.emit(Op::StoreLocal(acc));
2301
2302        // i := i + 1
2303        self.emit(Op::LoadLocal(i));
2304        let one = self.pool.int(1);
2305        self.emit(Op::PushConst(one));
2306        self.emit(Op::IntAdd);
2307        self.emit(Op::StoreLocal(i));
2308
2309        let jump_back = self.code.len();
2310        let back = (loop_top as i32) - (jump_back as i32 + 1);
2311        self.emit(Op::Jump(back));
2312
2313        let exit_target = self.code.len() as i32;
2314        if let Op::JumpIfNot(off) = &mut self.code[j_exit] {
2315            *off = exit_target - (j_exit as i32 + 1);
2316        }
2317        self.emit(Op::LoadLocal(acc));
2318    }
2319
2320    /// Inline pattern: `<module>.map(v, f)` and friends.
2321    /// `wrap_with`: variant tag whose payload triggers the call (Ok / Some / Err).
2322    /// `wrap_result`: if true, wrap the closure's result back in `wrap_with`
2323    /// (map shape); if false, expect the closure to return a wrapped value
2324    /// itself (and_then shape).
2325    fn emit_variant_map(
2326        &mut self,
2327        args: &[a::CExpr],
2328        wrap_with: &str,
2329        wrap_result: bool,
2330    ) {
2331        // args[0] = the wrapped value (Result/Option), args[1] = closure
2332        let wrap_idx = self.pool.variant(wrap_with);
2333
2334        // Compile and store the value into a local, evaluate closure on top of stack.
2335        self.compile_expr(&args[0], false);
2336        let val_slot = self.alloc_local("__hov");
2337        self.emit(Op::StoreLocal(val_slot));
2338
2339        self.compile_expr(&args[1], false);
2340        let f_slot = self.alloc_local("__hof");
2341        self.emit(Op::StoreLocal(f_slot));
2342
2343        // Stack discipline:
2344        //   load val ⇒ [v]
2345        //   dup     ⇒ [v, v]
2346        //   test    ⇒ [v, Bool]
2347        //   jumpifnot ⇒ [v]
2348        // Both branches end with [v] before the branch body.
2349        self.emit(Op::LoadLocal(val_slot));
2350        self.emit(Op::Dup);
2351        self.emit(Op::TestVariant(wrap_idx));
2352        let j_skip = self.code.len();
2353        self.emit(Op::JumpIfNot(0));
2354
2355        // Matched arm: extract payload, call closure on it.
2356        self.emit(Op::GetVariantArg(0));
2357        let arg_slot = self.alloc_local("__hov_arg");
2358        self.emit(Op::StoreLocal(arg_slot));
2359        self.emit(Op::LoadLocal(f_slot));
2360        self.emit(Op::LoadLocal(arg_slot));
2361        let nid = self.pool.node_id("n_hov");
2362        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2363        if wrap_result {
2364            self.emit(Op::MakeVariant { name_idx: wrap_idx, arity: 1 });
2365        }
2366        let j_end = self.code.len();
2367        self.emit(Op::Jump(0));
2368
2369        // Skip arm: stack already has [v] from the failed Dup; nothing to do.
2370        let skip_target = self.code.len() as i32;
2371        if let Op::JumpIfNot(off) = &mut self.code[j_skip] {
2372            *off = skip_target - (j_skip as i32 + 1);
2373        }
2374
2375        let end_target = self.code.len() as i32;
2376        if let Op::Jump(off) = &mut self.code[j_end] {
2377            *off = end_target - (j_end as i32 + 1);
2378        }
2379    }
2380
2381    /// Sibling of `emit_variant_map` for the recovery combinators
2382    /// `result.or_else` and `option.or_else`. Differences from
2383    /// `emit_variant_map`:
2384    ///   - matches on the *negative* variant (`Err` / `None`)
2385    ///   - the closure's result becomes the call's result directly,
2386    ///     with no wrapping (it is itself a `Result` / `Option`)
2387    ///   - `option.or_else`'s closure takes zero args (`None` has no
2388    ///     payload to forward)
2389    fn emit_variant_or_else(
2390        &mut self,
2391        args: &[a::CExpr],
2392        match_on: &str,
2393        closure_arity: u16,
2394    ) {
2395        let match_idx = self.pool.variant(match_on);
2396
2397        self.compile_expr(&args[0], false);
2398        let val_slot = self.alloc_local("__hoe");
2399        self.emit(Op::StoreLocal(val_slot));
2400
2401        self.compile_expr(&args[1], false);
2402        let f_slot = self.alloc_local("__hoe_f");
2403        self.emit(Op::StoreLocal(f_slot));
2404
2405        // Stack discipline mirrors emit_variant_map:
2406        //   load val      ⇒ [v]
2407        //   dup           ⇒ [v, v]
2408        //   test          ⇒ [v, Bool]
2409        //   jumpifnot     ⇒ [v]
2410        // The unmatched arm leaves [v] (Ok/Some unchanged); the
2411        // matched arm pops [v] and pushes the closure's result.
2412        self.emit(Op::LoadLocal(val_slot));
2413        self.emit(Op::Dup);
2414        self.emit(Op::TestVariant(match_idx));
2415        let j_skip = self.code.len();
2416        self.emit(Op::JumpIfNot(0));
2417
2418        // Matched arm: pop the duplicate left on the stack,
2419        // then call the closure with whatever payload it expects.
2420        self.emit(Op::Pop);
2421        self.emit(Op::LoadLocal(f_slot));
2422        if closure_arity == 1 {
2423            self.emit(Op::LoadLocal(val_slot));
2424            self.emit(Op::GetVariantArg(0));
2425        }
2426        let nid = self.pool.node_id("n_hoe");
2427        self.emit(Op::CallClosure { arity: closure_arity, node_id_idx: nid });
2428
2429        let j_end = self.code.len();
2430        self.emit(Op::Jump(0));
2431
2432        // Unmatched arm: stack already holds [v]; nothing to do.
2433        let skip_target = self.code.len() as i32;
2434        if let Op::JumpIfNot(off) = &mut self.code[j_skip] {
2435            *off = skip_target - (j_skip as i32 + 1);
2436        }
2437
2438        let end_target = self.code.len() as i32;
2439        if let Op::Jump(off) = &mut self.code[j_end] {
2440            *off = end_target - (j_end as i32 + 1);
2441        }
2442    }
2443
2444    /// `option.unwrap_or_else(opt, f)` — lazy default via zero-arg thunk.
2445    ///   Some(x) → x          (unwrap; no wrapping)
2446    ///   None    → f()        (call thunk; return its result directly)
2447    fn emit_option_unwrap_or_else(&mut self, args: &[a::CExpr]) {
2448        let some_idx = self.pool.variant("Some");
2449
2450        // Compile opt and f; stash both so they're accessible on both arms.
2451        self.compile_expr(&args[0], false);
2452        let val_slot = self.alloc_local("__uoe_val");
2453        self.emit(Op::StoreLocal(val_slot));
2454
2455        self.compile_expr(&args[1], false);
2456        let f_slot = self.alloc_local("__uoe_f");
2457        self.emit(Op::StoreLocal(f_slot));
2458
2459        // Test whether opt is Some.
2460        //   load val ⇒ [v]
2461        //   dup      ⇒ [v, v]
2462        //   test     ⇒ [v, Bool]
2463        //   jumpifnot → None arm
2464        self.emit(Op::LoadLocal(val_slot));
2465        self.emit(Op::Dup);
2466        self.emit(Op::TestVariant(some_idx));
2467        let j_none = self.code.len();
2468        self.emit(Op::JumpIfNot(0));
2469
2470        // Some arm: extract the payload from [v] left on the stack.
2471        self.emit(Op::GetVariantArg(0));
2472        let j_end = self.code.len();
2473        self.emit(Op::Jump(0));
2474
2475        // None arm: pop the [v] duplicate, call the thunk.
2476        let none_target = self.code.len() as i32;
2477        if let Op::JumpIfNot(off) = &mut self.code[j_none] {
2478            *off = none_target - (j_none as i32 + 1);
2479        }
2480        self.emit(Op::Pop);
2481        self.emit(Op::LoadLocal(f_slot));
2482        let nid = self.pool.node_id("n_uoe");
2483        self.emit(Op::CallClosure { arity: 0, node_id_idx: nid });
2484
2485        // Patch jump-to-end from Some arm.
2486        let end_target = self.code.len() as i32;
2487        if let Op::Jump(off) = &mut self.code[j_end] {
2488            *off = end_target - (j_end as i32 + 1);
2489        }
2490    }
2491
2492    // ---- std.flow trampolines ----------------------------------------
2493    //
2494    // Each flow.<op>(c1, c2, ...) call site:
2495    //   1. compiles its closure args and leaves them on the stack
2496    //   2. registers a fresh "trampoline" Function whose body invokes
2497    //      those captured closures appropriately
2498    //   3. emits MakeClosure { fn_id: trampoline, capture_count: N }
2499    //
2500    // The trampoline's parameter layout is [capture_0, ..., capture_{N-1},
2501    // arg_0, ...]: captures first, the closure's own args after.
2502
2503    /// Allocate a fresh fn_id for a trampoline and install its bytecode.
2504    /// Trampolines are the one Function-creation path that already has
2505    /// the body in hand at install time (top-level fns and lambdas have
2506    /// it filled in later), so we compute `body_hash` immediately. The
2507    /// final hash pass at the end of `compile_program` is a no-op here.
2508    fn install_trampoline(&mut self, name: &str, arity: u16, locals_count: u16, code: Vec<Op>) -> u32 {
2509        let fn_id = self.next_fn_id.len() as u32;
2510        let body_hash = crate::program::compute_body_hash(
2511            arity, locals_count, &code, &self.pool.record_shapes);
2512        self.next_fn_id.push(Function {
2513            name: name.into(),
2514            arity,
2515            locals_count,
2516            code,
2517            effects: Vec::new(),
2518            body_hash,
2519            // Trampolines (flow.sequential / parallel / etc.) don't
2520            // surface refined params at this layer.
2521            refinements: Vec::new(),
2522            // Trampolines never emit `Op::GetField` — they're pure
2523            // scaffolding. Leaving this at 0 means the VM allocates
2524            // an empty IC slot.
2525            field_ic_sites: 0,
2526        });
2527        fn_id
2528    }
2529
2530    /// `flow.sequential(f, g)` returns a closure `(x) -> g(f(x))`.
2531    fn emit_flow_sequential(&mut self, args: &[a::CExpr]) {
2532        // Push f, g; build the trampoline closure with 2 captures.
2533        self.compile_expr(&args[0], false);
2534        self.compile_expr(&args[1], false);
2535        let nid = self.pool.node_id("n_flow_sequential");
2536        let code = vec![
2537            // Locals: [f=0, g=1, x=2]
2538            Op::LoadLocal(0),                                  // push f
2539            Op::LoadLocal(2),                                  // push x
2540            Op::CallClosure { arity: 1, node_id_idx: nid },    // r = f(x)
2541            // stack: [r]
2542            Op::StoreLocal(3),                                 // tmp = r
2543            Op::LoadLocal(1),                                  // push g
2544            Op::LoadLocal(3),                                  // push tmp
2545            Op::CallClosure { arity: 1, node_id_idx: nid },    // r = g(tmp)
2546            Op::Return,
2547        ];
2548        let fn_id = self.install_trampoline("__flow_sequential", 3, 4, code);
2549        self.emit(Op::MakeClosure { fn_id, capture_count: 2 });
2550    }
2551
2552    /// `flow.parallel(fa, fb)` returns a closure `() -> (fa(), fb())`.
2553    /// Implementation is sequential: each function is called in order
2554    /// and the results are packed into a 2-tuple. The spec (§11.2)
2555    /// allows the runtime to apply true parallelism here; that needs
2556    /// a thread-safe handler split and is left to a follow-up. The
2557    /// signature is what users program against — sequential vs threaded
2558    /// is an implementation detail invisible to the type system.
2559    fn emit_flow_parallel(&mut self, args: &[a::CExpr]) {
2560        // Push fa, fb; build a 0-arg trampoline closure with 2 captures.
2561        self.compile_expr(&args[0], false);
2562        self.compile_expr(&args[1], false);
2563        let nid = self.pool.node_id("n_flow_parallel");
2564        let code = vec![
2565            // Locals: [fa=0, fb=1]
2566            Op::LoadLocal(0),                                  // push fa
2567            Op::CallClosure { arity: 0, node_id_idx: nid },    // a = fa()
2568            Op::LoadLocal(1),                                  // push fb
2569            Op::CallClosure { arity: 0, node_id_idx: nid },    // b = fb()
2570            Op::MakeTuple(2),                                  // (a, b)
2571            Op::Return,
2572        ];
2573        let fn_id = self.install_trampoline("__flow_parallel", 2, 2, code);
2574        self.emit(Op::MakeClosure { fn_id, capture_count: 2 });
2575    }
2576
2577    /// `flow.parallel_list(actions)` runs each 0-arg closure in `actions`
2578    /// and returns the results as a list in input order. Variadic
2579    /// counterpart to `flow.parallel`. Sequential under the hood — the
2580    /// spec (§11.2) reserves true threading for a future scheduler.
2581    /// Compiled inline (mirrors `list.map`) so closure args can flow
2582    /// through `CallClosure` without a heap-allocated trampoline.
2583    fn emit_flow_parallel_list(&mut self, args: &[a::CExpr]) {
2584        // xs := actions
2585        self.compile_expr(&args[0], false);
2586        let xs = self.alloc_local("__fpl_xs");
2587        self.emit(Op::StoreLocal(xs));
2588
2589        // out := []
2590        self.emit(Op::MakeList(0));
2591        let out = self.alloc_local("__fpl_out");
2592        self.emit(Op::StoreLocal(out));
2593
2594        // i := 0
2595        let zero = self.pool.int(0);
2596        self.emit(Op::PushConst(zero));
2597        let i = self.alloc_local("__fpl_i");
2598        self.emit(Op::StoreLocal(i));
2599
2600        // loop_top: while i < len(xs) { ... }
2601        let loop_top = self.code.len();
2602        self.emit(Op::LoadLocal(i));
2603        self.emit(Op::LoadLocal(xs));
2604        self.emit(Op::GetListLen);
2605        self.emit(Op::IntLt);
2606        let j_exit = self.code.len();
2607        self.emit(Op::JumpIfNot(0));
2608
2609        // body: out := out ++ [xs[i]()]
2610        let nid = self.pool.node_id("n_flow_parallel_list");
2611        self.emit(Op::LoadLocal(out));
2612        self.emit(Op::LoadLocal(xs));
2613        self.emit(Op::LoadLocal(i));
2614        self.emit(Op::GetListElemDyn);
2615        self.emit(Op::CallClosure { arity: 0, node_id_idx: nid });
2616        self.emit(Op::ListAppend);
2617        self.emit(Op::StoreLocal(out));
2618
2619        // i := i + 1
2620        self.emit(Op::LoadLocal(i));
2621        let one = self.pool.int(1);
2622        self.emit(Op::PushConst(one));
2623        self.emit(Op::IntAdd);
2624        self.emit(Op::StoreLocal(i));
2625
2626        // jump back
2627        let jump_back = self.code.len();
2628        let back = (loop_top as i32) - (jump_back as i32 + 1);
2629        self.emit(Op::Jump(back));
2630
2631        // exit: patch j_exit, push out
2632        let exit_target = self.code.len() as i32;
2633        if let Op::JumpIfNot(off) = &mut self.code[j_exit] {
2634            *off = exit_target - (j_exit as i32 + 1);
2635        }
2636        self.emit(Op::LoadLocal(out));
2637    }
2638
2639    /// `flow.branch(cond, t, f)` returns a closure `(x) -> if cond(x) then t(x) else f(x)`.
2640    fn emit_flow_branch(&mut self, args: &[a::CExpr]) {
2641        self.compile_expr(&args[0], false);
2642        self.compile_expr(&args[1], false);
2643        self.compile_expr(&args[2], false);
2644        let nid = self.pool.node_id("n_flow_branch");
2645        let mut code = vec![
2646            // Locals: [cond=0, t=1, f=2, x=3]
2647            Op::LoadLocal(0),                               // push cond
2648            Op::LoadLocal(3),                               // push x
2649            Op::CallClosure { arity: 1, node_id_idx: nid }, // bool
2650        ];
2651        let j_false = code.len();
2652        code.push(Op::JumpIfNot(0));                        // patched
2653        // true arm: t(x)
2654        code.push(Op::LoadLocal(1));
2655        code.push(Op::LoadLocal(3));
2656        code.push(Op::CallClosure { arity: 1, node_id_idx: nid });
2657        code.push(Op::Return);
2658        // false arm
2659        let false_target = code.len() as i32;
2660        if let Op::JumpIfNot(off) = &mut code[j_false] {
2661            *off = false_target - (j_false as i32 + 1);
2662        }
2663        code.push(Op::LoadLocal(2));
2664        code.push(Op::LoadLocal(3));
2665        code.push(Op::CallClosure { arity: 1, node_id_idx: nid });
2666        code.push(Op::Return);
2667
2668        let fn_id = self.install_trampoline("__flow_branch", 4, 4, code);
2669        self.emit(Op::MakeClosure { fn_id, capture_count: 3 });
2670    }
2671
2672    /// `flow.retry(f, max_attempts)` returns a closure `(x) -> Result[U, E]`
2673    /// that calls `f(x)` up to `max_attempts` times, returning the first
2674    /// `Ok` or the final `Err`.
2675    fn emit_flow_retry(&mut self, args: &[a::CExpr]) {
2676        self.compile_expr(&args[0], false);
2677        self.compile_expr(&args[1], false);
2678        let call_nid = self.pool.node_id("n_flow_retry");
2679        let ok_idx = self.pool.variant("Ok");
2680        let zero_const = self.pool.int(0);
2681        let one_const = self.pool.int(1);
2682        // Locals: [f=0, max=1, x=2, i=3, last=4]
2683        let mut code = vec![
2684            // i := 0
2685            Op::PushConst(zero_const),
2686            Op::StoreLocal(3),
2687        ];
2688        // loop_top: while i < max
2689        let loop_top = code.len() as i32;
2690        code.push(Op::LoadLocal(3));
2691        code.push(Op::LoadLocal(1));
2692        code.push(Op::IntLt);
2693        let j_done = code.len();
2694        code.push(Op::JumpIfNot(0));                       // patched
2695
2696        // body: r := f(x); last := r
2697        code.push(Op::LoadLocal(0));
2698        code.push(Op::LoadLocal(2));
2699        code.push(Op::CallClosure { arity: 1, node_id_idx: call_nid });
2700        code.push(Op::StoreLocal(4));
2701
2702        // Test variant Ok on last; if so, return last.
2703        code.push(Op::LoadLocal(4));
2704        code.push(Op::TestVariant(ok_idx));
2705        let j_was_err = code.len();
2706        code.push(Op::JumpIfNot(0));                       // patched: skip return
2707        code.push(Op::LoadLocal(4));
2708        code.push(Op::Return);
2709
2710        // was_err: i := i + 1; jump loop_top
2711        let was_err_target = code.len() as i32;
2712        if let Op::JumpIfNot(off) = &mut code[j_was_err] {
2713            *off = was_err_target - (j_was_err as i32 + 1);
2714        }
2715        code.push(Op::LoadLocal(3));
2716        code.push(Op::PushConst(one_const));
2717        code.push(Op::IntAdd);
2718        code.push(Op::StoreLocal(3));
2719        let pc_after_jump = code.len() as i32 + 1;
2720        code.push(Op::Jump(loop_top - pc_after_jump));
2721
2722        // done: return last (the final Err, or Unit if max=0).
2723        let done_target = code.len() as i32;
2724        if let Op::JumpIfNot(off) = &mut code[j_done] {
2725            *off = done_target - (j_done as i32 + 1);
2726        }
2727        code.push(Op::LoadLocal(4));
2728        code.push(Op::Return);
2729
2730        let fn_id = self.install_trampoline("__flow_retry", 3, 5, code);
2731        self.emit(Op::MakeClosure { fn_id, capture_count: 2 });
2732    }
2733
2734    /// `flow.retry_with_backoff(f, attempts, base_ms)` (#226). Variant
2735    /// of `flow.retry` that sleeps between attempts. The first
2736    /// attempt fires immediately; attempt k > 1 waits `base_ms *
2737    /// 2^(k-2)` ms before retrying. Sleeps go through
2738    /// `time.sleep_ms`, which is why the resulting closure carries
2739    /// `[time]` in its effect row even though the underlying `f` is
2740    /// pure.
2741    fn emit_flow_retry_with_backoff(&mut self, args: &[a::CExpr]) {
2742        // Push captures: f, max, base_ms. The trampoline takes one
2743        // call-time arg `x`, so capture_count = 3, arity = 4.
2744        self.compile_expr(&args[0], false);
2745        self.compile_expr(&args[1], false);
2746        self.compile_expr(&args[2], false);
2747        let call_nid    = self.pool.node_id("n_flow_retry_backoff");
2748        let sleep_nid   = self.pool.node_id("n_flow_retry_backoff_sleep");
2749        let kind_idx    = self.pool.str("time");
2750        let op_idx      = self.pool.str("sleep_ms");
2751        let ok_idx      = self.pool.variant("Ok");
2752        let zero_const  = self.pool.int(0);
2753        let one_const   = self.pool.int(1);
2754        let two_const   = self.pool.int(2);
2755        // Locals layout:
2756        //   0=f, 1=max, 2=base_ms (captures)
2757        //   3=x (arg)
2758        //   4=i, 5=last, 6=next_delay (working state)
2759        let mut code = vec![
2760            // next_delay := base_ms
2761            Op::LoadLocal(2),
2762            Op::StoreLocal(6),
2763            // i := 0
2764            Op::PushConst(zero_const),
2765            Op::StoreLocal(4),
2766        ];
2767
2768        let loop_top = code.len() as i32;
2769        // while i < max
2770        code.push(Op::LoadLocal(4));
2771        code.push(Op::LoadLocal(1));
2772        code.push(Op::IntLt);
2773        let j_done = code.len();
2774        code.push(Op::JumpIfNot(0)); // patched
2775
2776        // if i > 0: time.sleep_ms(next_delay); next_delay := next_delay * 2
2777        code.push(Op::PushConst(zero_const));
2778        code.push(Op::LoadLocal(4));
2779        code.push(Op::IntLt);                // 0 < i ?
2780        let j_no_sleep = code.len();
2781        code.push(Op::JumpIfNot(0));         // patched: skip the sleep block
2782        // Sleep
2783        code.push(Op::LoadLocal(6));         // arg = next_delay
2784        code.push(Op::EffectCall {
2785            kind_idx, op_idx, arity: 1, node_id_idx: sleep_nid,
2786        });
2787        code.push(Op::Pop);                  // discard the Unit result
2788        // next_delay := next_delay * 2
2789        code.push(Op::LoadLocal(6));
2790        code.push(Op::PushConst(two_const));
2791        code.push(Op::NumMul);
2792        code.push(Op::StoreLocal(6));
2793        // patch the no-sleep skip
2794        let after_sleep = code.len() as i32;
2795        if let Op::JumpIfNot(off) = &mut code[j_no_sleep] {
2796            *off = after_sleep - (j_no_sleep as i32 + 1);
2797        }
2798
2799        // last := f(x)
2800        code.push(Op::LoadLocal(0));
2801        code.push(Op::LoadLocal(3));
2802        code.push(Op::CallClosure { arity: 1, node_id_idx: call_nid });
2803        code.push(Op::StoreLocal(5));
2804
2805        // if Ok(last): return last
2806        code.push(Op::LoadLocal(5));
2807        code.push(Op::TestVariant(ok_idx));
2808        let j_was_err = code.len();
2809        code.push(Op::JumpIfNot(0)); // patched
2810        code.push(Op::LoadLocal(5));
2811        code.push(Op::Return);
2812
2813        // was_err: i := i + 1; jump loop_top
2814        let was_err_target = code.len() as i32;
2815        if let Op::JumpIfNot(off) = &mut code[j_was_err] {
2816            *off = was_err_target - (j_was_err as i32 + 1);
2817        }
2818        code.push(Op::LoadLocal(4));
2819        code.push(Op::PushConst(one_const));
2820        code.push(Op::IntAdd);
2821        code.push(Op::StoreLocal(4));
2822        let pc_after_jump = code.len() as i32 + 1;
2823        code.push(Op::Jump(loop_top - pc_after_jump));
2824
2825        // done: return last (the final Err, or Unit if max=0).
2826        let done_target = code.len() as i32;
2827        if let Op::JumpIfNot(off) = &mut code[j_done] {
2828            *off = done_target - (j_done as i32 + 1);
2829        }
2830        code.push(Op::LoadLocal(5));
2831        code.push(Op::Return);
2832
2833        let fn_id = self.install_trampoline("__flow_retry_backoff", 4, 7, code);
2834        self.emit(Op::MakeClosure { fn_id, capture_count: 3 });
2835    }
2836}
2837
2838/// Collect free variables referenced in `e` that are not in `bound`.
2839/// Mutates `bound` to track let/lambda introductions during the walk;
2840/// the caller's set is preserved on return because Rust's borrow rules
2841/// force us to clone for sub-scopes that rebind a name.
2842fn free_vars(e: &a::CExpr, bound: &mut std::collections::HashSet<String>, out: &mut Vec<String>) {
2843    match e {
2844        a::CExpr::Literal { .. } => {}
2845        a::CExpr::Var { name } => {
2846            if !bound.contains(name) && !out.contains(name) {
2847                out.push(name.clone());
2848            }
2849        }
2850        a::CExpr::Call { callee, args } => {
2851            free_vars(callee, bound, out);
2852            for a in args { free_vars(a, bound, out); }
2853        }
2854        a::CExpr::Let { name, value, body, .. } => {
2855            free_vars(value, bound, out);
2856            let was_bound = bound.contains(name);
2857            bound.insert(name.clone());
2858            free_vars(body, bound, out);
2859            if !was_bound { bound.remove(name); }
2860        }
2861        a::CExpr::Match { scrutinee, arms } => {
2862            free_vars(scrutinee, bound, out);
2863            for arm in arms {
2864                let mut local_bound = bound.clone();
2865                pattern_binders(&arm.pattern, &mut local_bound);
2866                free_vars(&arm.body, &mut local_bound, out);
2867            }
2868        }
2869        a::CExpr::Block { statements, result } => {
2870            let mut local_bound = bound.clone();
2871            for s in statements { free_vars(s, &mut local_bound, out); }
2872            free_vars(result, &mut local_bound, out);
2873        }
2874        a::CExpr::Constructor { args, .. } => {
2875            for a in args { free_vars(a, bound, out); }
2876        }
2877        a::CExpr::RecordLit { fields } => {
2878            for f in fields { free_vars(&f.value, bound, out); }
2879        }
2880        a::CExpr::TupleLit { items } | a::CExpr::ListLit { items } => {
2881            for it in items { free_vars(it, bound, out); }
2882        }
2883        a::CExpr::FieldAccess { value, .. } => free_vars(value, bound, out),
2884        a::CExpr::Lambda { params, body, .. } => {
2885            let mut inner = bound.clone();
2886            for p in params { inner.insert(p.name.clone()); }
2887            free_vars(body, &mut inner, out);
2888        }
2889        a::CExpr::BinOp { lhs, rhs, .. } => {
2890            free_vars(lhs, bound, out);
2891            free_vars(rhs, bound, out);
2892        }
2893        a::CExpr::UnaryOp { expr, .. } => free_vars(expr, bound, out),
2894        a::CExpr::Return { value } => free_vars(value, bound, out),
2895    }
2896}
2897
2898fn pattern_binders(p: &a::Pattern, bound: &mut std::collections::HashSet<String>) {
2899    match p {
2900        a::Pattern::PWild | a::Pattern::PLiteral { .. } => {}
2901        a::Pattern::PVar { name } => { bound.insert(name.clone()); }
2902        a::Pattern::PConstructor { args, .. } => {
2903            for a in args { pattern_binders(a, bound); }
2904        }
2905        a::Pattern::PRecord { fields } => {
2906            for f in fields { pattern_binders(&f.pattern, bound); }
2907        }
2908        a::Pattern::PTuple { items } => {
2909            for it in items { pattern_binders(it, bound); }
2910        }
2911    }
2912}