Skip to main content

lex_bytecode/
compiler.rs

1//! M4 compiler: canonical AST → bytecode.
2
3use crate::op::*;
4use crate::program::*;
5use indexmap::IndexMap;
6use lex_ast as a;
7
8#[derive(Default)]
9struct ConstPool {
10    pool: Vec<Const>,
11    fields: IndexMap<String, u32>,
12    variants: IndexMap<String, u32>,
13    node_ids: IndexMap<String, u32>,
14    ints: IndexMap<i64, u32>,
15    bools: IndexMap<u8, u32>,
16    strs: IndexMap<String, u32>,
17    /// Interned record field-name shapes (#461). Deduplicated by content
18    /// so a record literal with the same field-name layout reuses the
19    /// same `shape_idx` across the whole program — keeps the side-table
20    /// small even when the same struct is constructed in many places.
21    record_shapes: Vec<Vec<u32>>,
22    record_shape_dedup: IndexMap<Vec<u32>, u32>,
23}
24
25impl ConstPool {
26    fn field(&mut self, name: &str) -> u32 {
27        if let Some(i) = self.fields.get(name) { return *i; }
28        let i = self.pool.len() as u32;
29        self.pool.push(Const::FieldName(name.into()));
30        self.fields.insert(name.into(), i);
31        i
32    }
33    fn variant(&mut self, name: &str) -> u32 {
34        if let Some(i) = self.variants.get(name) { return *i; }
35        let i = self.pool.len() as u32;
36        self.pool.push(Const::VariantName(name.into()));
37        self.variants.insert(name.into(), i);
38        i
39    }
40    fn node_id(&mut self, name: &str) -> u32 {
41        if let Some(i) = self.node_ids.get(name) { return *i; }
42        let i = self.pool.len() as u32;
43        self.pool.push(Const::NodeId(name.into()));
44        self.node_ids.insert(name.into(), i);
45        i
46    }
47    fn int(&mut self, n: i64) -> u32 {
48        if let Some(i) = self.ints.get(&n) { return *i; }
49        let i = self.pool.len() as u32;
50        self.pool.push(Const::Int(n));
51        self.ints.insert(n, i);
52        i
53    }
54    fn bool(&mut self, b: bool) -> u32 {
55        let key = b as u8;
56        if let Some(i) = self.bools.get(&key) { return *i; }
57        let i = self.pool.len() as u32;
58        self.pool.push(Const::Bool(b));
59        self.bools.insert(key, i);
60        i
61    }
62    fn str(&mut self, s: &str) -> u32 {
63        if let Some(i) = self.strs.get(s) { return *i; }
64        let i = self.pool.len() as u32;
65        self.pool.push(Const::Str(s.into()));
66        self.strs.insert(s.into(), i);
67        i
68    }
69    fn float(&mut self, f: f64) -> u32 {
70        // Floats: not deduped (NaN issues).
71        let i = self.pool.len() as u32;
72        self.pool.push(Const::Float(f));
73        i
74    }
75    fn unit(&mut self) -> u32 {
76        let i = self.pool.len() as u32;
77        self.pool.push(Const::Unit);
78        i
79    }
80
81    /// Intern a record field-name shape (#461). Returns the index into
82    /// `record_shapes`; identical shapes (same field-name-index vec)
83    /// always return the same index.
84    fn record_shape(&mut self, idxs: Vec<u32>) -> u32 {
85        if let Some(i) = self.record_shape_dedup.get(&idxs) {
86            return *i;
87        }
88        let i = self.record_shapes.len() as u32;
89        self.record_shape_dedup.insert(idxs.clone(), i);
90        self.record_shapes.push(idxs);
91        i
92    }
93}
94
95pub fn compile_program(stages: &[a::Stage]) -> Program {
96    let mut p = Program {
97        constants: Vec::new(),
98        functions: Vec::new(),
99        function_names: IndexMap::new(),
100        module_aliases: IndexMap::new(),
101        entry: None,
102        record_shapes: Vec::new(),
103    };
104
105    // Collect imports as alias → module-name. The module name is the part
106    // after `std.` (so `import "std.io" as io` ⇒ alias `io` → module `io`).
107    for s in stages {
108        if let a::Stage::Import(i) = s {
109            let module = i.reference.strip_prefix("std.").unwrap_or(&i.reference).to_string();
110            p.module_aliases.insert(i.alias.clone(), module);
111        }
112    }
113
114    for s in stages {
115        if let a::Stage::FnDecl(fd) = s {
116            let idx = p.functions.len() as u32;
117            p.function_names.insert(fd.name.clone(), idx);
118            p.functions.push(Function {
119                name: fd.name.clone(),
120                arity: fd.params.len() as u16,
121                locals_count: 0,
122                code: Vec::new(),
123                effects: fd.effects.iter().map(|e| DeclaredEffect {
124                    kind: e.name.clone(),
125                    arg: e.arg.as_ref().map(|a| match a {
126                        a::EffectArg::Str { value } => EffectArg::Str(value.clone()),
127                        a::EffectArg::Int { value } => EffectArg::Int(*value),
128                        a::EffectArg::Ident { value } => EffectArg::Ident(value.clone()),
129                    }),
130                }).collect(),
131                // Filled in at the end of the compile pass, once `code`
132                // and `locals_count` are final. See #222.
133                body_hash: crate::program::ZERO_BODY_HASH,
134                // Per-param refinement predicates for runtime check
135                // (#209 slice 3). Lifted directly from each param's
136                // `TypeExpr::Refined` if present; `None` otherwise.
137                refinements: fd.params.iter().map(|p| match &p.ty {
138                    a::TypeExpr::Refined { binding, predicate, .. } =>
139                        Some(crate::program::Refinement {
140                            binding: binding.clone(),
141                            predicate: (**predicate).clone(),
142                        }),
143                    _ => None,
144                }).collect(),
145                // Filled in below once the FnCompiler counts emit sites.
146                field_ic_sites: 0,
147            });
148        }
149    }
150
151    let mut pool = ConstPool::default();
152    let function_names = p.function_names.clone();
153    let module_aliases = p.module_aliases.clone();
154    let mut pending_lambdas: Vec<PendingLambda> = Vec::new();
155    // #461 slice 7: collect `type Foo = { ... }` aliases so
156    // `record_field_types` can resolve a parameter's named record
157    // type to its field layout. Without this, `r :: R` where
158    // `type R = { x :: Int, y :: Int }` falls through to Unknown
159    // and the typed-Add lowering misses on `r.x + r.y`.
160    let mut type_aliases: IndexMap<String, a::TypeExpr> = IndexMap::new();
161    for s in stages {
162        if let a::Stage::TypeDecl(td) = s {
163            // Parameterized type aliases (`type Box[T] = ...`) are
164            // out of scope for this slice — without monomorphization
165            // we can't know what T resolves to. Skip them.
166            if td.params.is_empty() {
167                type_aliases.insert(td.name.clone(), td.definition.clone());
168            }
169        }
170    }
171
172    for s in stages {
173        if let a::Stage::FnDecl(_) = s {
174            // Build a NodeId map for *this* stage so the compiler can stamp
175            // each Call/EffectCall opcode with the originating AST node.
176            let id_map = lex_ast::expr_ids(s);
177            let fd = match s { a::Stage::FnDecl(fd) => fd, _ => unreachable!() };
178            let mut fc = FnCompiler {
179                code: Vec::new(),
180                locals: IndexMap::new(),
181                next_local: 0,
182                peak_local: 0,
183                local_types: IndexMap::new(),
184                local_record_field_types: IndexMap::new(),
185                field_get_sites: 0,
186                pool: &mut pool,
187                function_names: &function_names,
188                module_aliases: &module_aliases,
189                id_map: &id_map,
190                pending_lambdas: &mut pending_lambdas,
191                next_fn_id: &mut p.functions,
192            };
193            for param in &fd.params {
194                let i = fc.next_local;
195                fc.locals.insert(param.name.clone(), i);
196                fc.local_types.insert(param.name.clone(), classify_type_expr(&param.ty));
197                // #461 slice 7: inline-record parameter (`r ::
198                // { x :: Int, y :: Int }`) — populate the per-local
199                // field-type map so `r.x + r.y` classifies as
200                // Int+Int → IntAdd, which slice 7 then fuses.
201                if let Some(ftypes) = record_field_types(&param.ty, &type_aliases) {
202                    fc.local_record_field_types.insert(param.name.clone(), ftypes);
203                }
204                fc.next_local += 1;
205                fc.peak_local = fc.next_local;
206            }
207            fc.compile_expr(&fd.body, true);
208            fc.code.push(Op::Return);
209            let code = std::mem::take(&mut fc.code);
210            let peak = fc.peak_local;
211            let field_sites = fc.field_get_sites as u16;
212            drop(fc);
213            let idx = function_names[&fd.name];
214            p.functions[idx as usize].code = code;
215            p.functions[idx as usize].field_ic_sites = field_sites;
216            p.functions[idx as usize].locals_count = peak;
217        }
218    }
219
220    // Compile pending lambdas in FIFO order. Each lambda may emit further
221    // lambdas; loop until the queue drains.
222    while let Some(pl) = pending_lambdas.pop() {
223        let id_map = std::collections::HashMap::new();
224        let mut fc = FnCompiler {
225            code: Vec::new(),
226            locals: IndexMap::new(),
227            next_local: 0,
228            peak_local: 0,
229            local_types: IndexMap::new(),
230            local_record_field_types: IndexMap::new(),
231            field_get_sites: 0,
232            pool: &mut pool,
233            function_names: &function_names,
234            module_aliases: &module_aliases,
235            id_map: &id_map,
236            pending_lambdas: &mut pending_lambdas,
237            next_fn_id: &mut p.functions,
238        };
239        for name in &pl.capture_names {
240            let i = fc.next_local;
241            fc.locals.insert(name.clone(), i);
242            // Captures' static types aren't known at this layer
243            // — the closure's environment carries them dynamically.
244            // Conservative fallback; binop lowering stays correct
245            // because Unknown classifies through to NumAdd.
246            fc.local_types.insert(name.clone(), NumTy::Unknown);
247            fc.next_local += 1;
248            fc.peak_local = fc.next_local;
249        }
250        for p in &pl.params {
251            let i = fc.next_local;
252            fc.locals.insert(p.name.clone(), i);
253            fc.local_types.insert(p.name.clone(), classify_type_expr(&p.ty));
254            fc.next_local += 1;
255            fc.peak_local = fc.next_local;
256        }
257        fc.compile_expr(&pl.body, true);
258        fc.code.push(Op::Return);
259        let code = std::mem::take(&mut fc.code);
260        let peak = fc.peak_local;
261        let field_sites = fc.field_get_sites as u16;
262        drop(fc);
263        p.functions[pl.fn_id as usize].code = code;
264        p.functions[pl.fn_id as usize].field_ic_sites = field_sites;
265        p.functions[pl.fn_id as usize].locals_count = peak;
266    }
267
268    // #464 step 2: escape-analysis-driven lowering. Rewrites
269    // `MakeRecord` at non-escaping sites to `AllocStackRecord`, which
270    // the VM allocates in the frame's stack-record arena instead of
271    // on the heap. Runs on raw bytecode (before the peephole passes)
272    // so the escape analysis — which itself walks raw bytecode — sees
273    // exactly the program it was designed for.
274    //
275    // The peephole passes that follow do not match on MakeRecord /
276    // AllocStackRecord, so swapping one for the other doesn't disturb
277    // any pattern. `compute_body_hash` lowers AllocStackRecord back
278    // to the legacy MakeRecord form (#222), so closure identity is
279    // invariant under this lowering.
280    //
281    // Escape hatch: `LEX_NO_STACK_RECORDS=1` skips the lowering
282    // entirely (#464 step 3). The flag exists so the bench can A/B
283    // the same source under matched VM/peephole conditions; in
284    // production code the pass always runs.
285    if std::env::var_os("LEX_NO_STACK_RECORDS").is_none() {
286        let escape_index = crate::escape::build_escape_index(&p.functions);
287        for f in p.functions.iter_mut() {
288            apply_escape_lowering(&mut f.code, &f.name, &escape_index);
289        }
290    }
291
292    // #463 slice 2b-i: arena-eligibility lowering. Runs **after**
293    // `apply_escape_lowering` and targets the remaining `MakeRecord`
294    // / `MakeTuple` sites — those the stack pass left alone because
295    // they cross the frame boundary, but the request-scope analysis
296    // proves they stay inside the active `EffectHandler` arena
297    // scope. The two passes form a three-tier allocation hierarchy:
298    //
299    //   frame-local        → AllocStackRecord  (#464, cheapest)
300    //   request-local      → AllocArenaRecord  (#463, this slice)
301    //   escapes request    → MakeRecord        (heap, status quo)
302    //
303    // Order matters: a site that fits the stack tier should land
304    // there (cheapest), so the stack pass runs first. The arena
305    // pass's match doesn't fire on AllocStackRecord, so already-
306    // stack-lowered sites stay stack-lowered. Sites that escape the
307    // frame and the request both pass through unchanged.
308    //
309    // Escape hatch: `LEX_NO_ARENA_RECORDS=1` skips the lowering,
310    // mirroring `LEX_NO_STACK_RECORDS`. The slice-2b-i bench uses
311    // this to A/B identical source under matched VM conditions.
312    //
313    // `body_hash` invariance: `compute_body_hash` decodes
314    // `AllocArenaRecord` / `AllocArenaTuple` back to their legacy
315    // `MakeRecord` / `MakeTuple` form, so closure identity (#222) is
316    // bit-identical across this and the stack lowering.
317    if std::env::var_os("LEX_NO_ARENA_RECORDS").is_none() {
318        let arena_index = crate::arena::build_arena_index(&p.functions);
319        for f in p.functions.iter_mut() {
320            apply_arena_lowering(&mut f.code, &f.name, &arena_index);
321        }
322    }
323
324    // Peephole pass (#461 superinstructions). Rewrites fusable opcode
325    // patterns into single dispatch steps. Runs before `body_hash`
326    // computation, but `compute_body_hash` decomposes each fused op
327    // back to its primitive form on hash — so closure identity (#222)
328    // is invariant under this pass and the order doesn't matter.
329    //
330    // Slices run sequentially: slice 2 looks for slice-1 output
331    // followed by a StoreLocal, so it must follow slice 1. Slice 3
332    // (LoadLocal + LoadLocal + IntAdd) is disjoint from both — its
333    // second slot is LoadLocal, not PushConst — so it can run in
334    // either order. Run it last to keep the slice 1/2 contract
335    // (slice 2 expects to see slice-1 output) untouched. Slice 4 is
336    // slice 3 for IntSub / IntMul (same pattern, different terminator);
337    // disjoint from every prior slice because the terminator op
338    // disambiguates, so order between slice 3 and slice 4 is free.
339    for f in p.functions.iter_mut() {
340        apply_peephole(&mut f.code, &pool.pool);
341        apply_peephole_slice2(&mut f.code);
342        apply_peephole_slice3(&mut f.code);
343        apply_peephole_slice4(&mut f.code);
344        // Slice 5 — jump-aware fusion of the loop-condition idiom
345        // (LoadLocal + LoadLocal/PushConst + IntLt + JumpIfNot).
346        // Runs after slices 3/4 because their 3-slot windows
347        // overlap slice 5's 4-slot window at position 0 and 1; if
348        // slice 3 fired first and consumed `LoadLocal + LoadLocal +
349        // IntAdd`, the `IntLt + JumpIfNot` that follows would not
350        // be a fusion candidate. Since slice 3's terminator is
351        // `IntAdd` and slice 5's is `IntLt`, the two don't compete
352        // on the same site — order between them is technically free
353        // but conventionally slice N runs after slice N-1.
354        apply_peephole_slice5(&mut f.code, &pool.pool);
355        // Slice 6 — absorb the match-scrutinee dance
356        // (`LoadLocal + StoreLocal` immediately preceding a slice-5
357        // fused op that reads the just-stored local). Must run after
358        // slice 5 since it matches on slice 5's output.
359        apply_peephole_slice6(&mut f.code);
360        // Slice 7/8 — fuse `LoadLocal + GetField + IntAdd|IntSub|IntMul`,
361        // the accumulator-with-field-read idiom. Disjoint from every
362        // earlier slice (only this one matches a GetField at slot 1),
363        // so order is independent — placed near the end for chronology.
364        apply_peephole_slice7(&mut f.code);
365        // Slice 9 — fuse the *remaining* bare `LoadLocal + GetField`
366        // pairs (those slice 7/8 didn't consume): chain-head field
367        // reads (`r.x` in `r.x + r.y`), standalone `r.field` reads
368        // (`r.total`), and field reads feeding non-add/sub/mul ops.
369        // MUST run after slice 7/8 — otherwise it would greedily eat
370        // the `LoadLocal + GetField` prefix of an `acc OP r.field`
371        // triple and prevent the 3-op fusion. Slice 7/8's tombstone
372        // GetFields are preceded by their fused op (not a bare
373        // LoadLocal), so slice 9 never matches them.
374        apply_peephole_slice9(&mut f.code);
375    }
376
377    // Final pass: stamp every function with its content hash now that
378    // every body is finalized (#222). Trampolines installed via
379    // `install_trampoline` already have it; recomputing is cheap and
380    // makes the invariant easier to read at this top level.
381    for f in p.functions.iter_mut() {
382        if f.body_hash == crate::program::ZERO_BODY_HASH {
383            f.body_hash = crate::program::compute_body_hash(
384                f.arity, f.locals_count, &f.code, &pool.record_shapes);
385        }
386    }
387
388    p.constants = pool.pool;
389    p.record_shapes = pool.record_shapes;
390    p
391}
392
393/// Peephole pass: rewrite fusable opcode patterns into superinstructions
394/// (#461). Each fused op claims its own slot in the code stream; the
395/// trailing primitive ops it absorbs stay in place as inert
396/// "tombstones" — the dispatch loop overrides its default `pc += 1`
397/// to step past them. Leaving the tombstones in place keeps
398/// `code.len()` invariant and means we don't have to renumber jump
399/// offsets.
400///
401/// Pattern (slice 1): `LoadLocal(i), PushConst(c), IntAdd` where
402/// `constants[c]` is a `Const::Int`. Fused to
403/// `LoadLocalAddIntConst { local_idx: i, imm_const_idx: c }`.
404/// Safety: the second and third slots must not be reachable from
405/// any Jump / JumpIf / JumpIfNot — otherwise a jump would land on a
406/// tombstone instead of the live op the source intended. The
407/// pre-pass below collects every jump target in the function and
408/// skips fusion sites whose tombstones overlap one.
409/// #464 step 2 — rewrite `MakeRecord` to `AllocStackRecord` at sites
410/// the escape analysis (`crate::escape::build_escape_index`) proved
411/// non-escaping. Each rewrite is a single-slot swap that preserves
412/// pc, stack delta, and shape semantics — jump targets, the peephole
413/// passes downstream, and the body-hash decoder all see the same
414/// program shape they would have seen for the unlowered code.
415///
416/// Sites that escape are left as-is and still incur the
417/// IndexMap-backed heap allocation. Step 3 of #464 carries the
418/// bench acceptance bars (≥1.5× speedup on `response_build`); this
419/// pass is the precondition.
420fn apply_escape_lowering(
421    code: &mut [Op],
422    fn_name: &str,
423    escape_index: &std::collections::HashMap<(String, u32), bool>,
424) {
425    for (pc, op) in code.iter_mut().enumerate() {
426        // Look up this (fn, pc) in the escape index. Absent → analysis
427        // didn't observe the site (defensive: leave on heap path).
428        // Present and false → safe to stack-allocate. Each rewrite is a
429        // single-slot swap preserving pc / stack delta, so jump
430        // targets, downstream peephole passes, and the body-hash
431        // decoder all see the same program shape.
432        let key = (fn_name.to_string(), pc as u32);
433        if !matches!(escape_index.get(&key), Some(false)) {
434            continue;
435        }
436        match *op {
437            Op::MakeRecord { shape_idx, field_count } => {
438                *op = Op::AllocStackRecord { shape_idx, field_count };
439            }
440            // #464 tuple codegen: same single-slot swap as records.
441            Op::MakeTuple(arity) => {
442                *op = Op::AllocStackTuple { arity };
443            }
444            _ => {}
445        }
446    }
447}
448
449/// #463 slice 2b-i — rewrite `MakeRecord` / `MakeTuple` to the arena
450/// variants at sites the request-scope analysis
451/// (`crate::arena::build_arena_index`) proved do not escape the
452/// active `EffectHandler` arena scope.
453///
454/// Only fires on **remaining** `MakeRecord` / `MakeTuple` sites — the
455/// stack pass (`apply_escape_lowering`) runs first and converts the
456/// non-frame-escaping cheaper-tier sites. Sites that escape both the
457/// frame *and* the request stay as `MakeRecord` / `MakeTuple` (heap),
458/// untouched.
459///
460/// Each rewrite is the same single-slot swap as the stack lowering:
461/// pc / stack delta / shape semantics preserved, jump targets and
462/// downstream peephole passes see the same program shape, and
463/// `compute_body_hash` (#222) decodes both arena ops back to their
464/// legacy `MakeRecord` / `MakeTuple` form so closure identity is
465/// invariant.
466fn apply_arena_lowering(
467    code: &mut [Op],
468    fn_name: &str,
469    arena_index: &std::collections::HashMap<(String, u32), bool>,
470) {
471    for (pc, op) in code.iter_mut().enumerate() {
472        // arena_index value: true = arena-eligible. Absent or false
473        // → leave on heap (defensive default; absent means the
474        // analysis didn't observe the site).
475        let key = (fn_name.to_string(), pc as u32);
476        if !matches!(arena_index.get(&key), Some(true)) {
477            continue;
478        }
479        match *op {
480            Op::MakeRecord { shape_idx, field_count } => {
481                *op = Op::AllocArenaRecord { shape_idx, field_count };
482            }
483            Op::MakeTuple(arity) => {
484                *op = Op::AllocArenaTuple { arity };
485            }
486            _ => {}
487        }
488    }
489}
490
491fn apply_peephole(code: &mut [Op], constants: &[Const]) {
492    if code.len() < 3 { return; }
493    let jump_targets = collect_jump_targets(code);
494
495    let n = code.len();
496    let mut k = 0;
497    while k + 2 < n {
498        if let (Op::LoadLocal(local_idx), Op::PushConst(imm_const_idx), Op::IntAdd)
499            = (code[k], code[k + 1], code[k + 2])
500        {
501            let imm_is_int = matches!(
502                constants.get(imm_const_idx as usize),
503                Some(Const::Int(_))
504            );
505            // Tombstones at k+1 and k+2 must not be jump targets;
506            // k itself can be a target (it stays a live op — the
507            // fused form executes the same semantics in one step).
508            let safe = imm_is_int
509                && !jump_targets.contains(&(k + 1))
510                && !jump_targets.contains(&(k + 2));
511            if safe {
512                code[k] = Op::LoadLocalAddIntConst { local_idx, imm_const_idx };
513                k += 3;
514                continue;
515            }
516        }
517        k += 1;
518    }
519}
520
521/// Slice 2: fuse `[LoadLocalAddIntConst, _, _, StoreLocal(dest)]`
522/// into `LoadLocalAddIntConstStoreLocal { src, imm_const_idx, dest }`.
523/// The two `_` slots are slice-1 tombstones (the original PushConst
524/// and IntAdd) and stay in place as slice-2 tombstones too. The
525/// dispatch loop advances pc by 4 past all three trailing slots
526/// after executing the fused op.
527fn apply_peephole_slice2(code: &mut [Op]) {
528    if code.len() < 4 { return; }
529    let jump_targets = collect_jump_targets(code);
530
531    let n = code.len();
532    let mut k = 0;
533    while k + 3 < n {
534        if let (
535            Op::LoadLocalAddIntConst { local_idx: src, imm_const_idx },
536            _,
537            _,
538            Op::StoreLocal(dest),
539        ) = (code[k], code[k + 1], code[k + 2], code[k + 3])
540        {
541            // Slice-1 contract: code[k+1] is the original
542            // PushConst(imm_const_idx) and code[k+2] is the
543            // original IntAdd. We don't re-verify those — slice 1
544            // is the only producer of LoadLocalAddIntConst and
545            // always leaves the contract intact.
546            //
547            // Safety: tombstones at k+1..k+3 must not be reachable
548            // from any jump. k itself can be (it's still a live
549            // op carrying the same semantics).
550            let safe = !jump_targets.contains(&(k + 1))
551                && !jump_targets.contains(&(k + 2))
552                && !jump_targets.contains(&(k + 3));
553            if safe {
554                code[k] = Op::LoadLocalAddIntConstStoreLocal {
555                    src,
556                    imm_const_idx,
557                    dest,
558                };
559                k += 4;
560                continue;
561            }
562        }
563        k += 1;
564    }
565}
566
567/// Slice 3: fuse `[LoadLocal(lhs), LoadLocal(rhs), IntAdd]` into
568/// `LoadLocalAddLocal { lhs_idx, rhs_idx }`. The binary-op-on-two-
569/// locals idiom: any `a + b` where both operands compile to a
570/// `LoadLocal` (typed `Int`). Mirrors slice 1's shape exactly — the
571/// trailing `LoadLocal` + `IntAdd` stay in place as inert tombstones
572/// with cancelling stack deltas (+1, -1), so the verifier and
573/// body-hash decoder both keep walking them as live.
574///
575/// Disjoint from slice 1: the second slot disambiguates (LoadLocal
576/// vs PushConst), so a site can match at most one of the two. Runs
577/// after slice 2 so we don't accidentally consume a `LoadLocal` slot
578/// that slice 2 was about to fuse into a `*StoreLocal` superop (and
579/// to keep slice 2's input contract — slice-1 output followed by
580/// StoreLocal — untouched).
581///
582/// Safety: like slice 1, the trailing two slots must not be jump
583/// targets. The first slot can be a target (it stays a live op).
584fn apply_peephole_slice3(code: &mut [Op]) {
585    if code.len() < 3 { return; }
586    let jump_targets = collect_jump_targets(code);
587
588    let n = code.len();
589    let mut k = 0;
590    while k + 2 < n {
591        if let (Op::LoadLocal(lhs_idx), Op::LoadLocal(rhs_idx), Op::IntAdd)
592            = (code[k], code[k + 1], code[k + 2])
593        {
594            let safe = !jump_targets.contains(&(k + 1))
595                && !jump_targets.contains(&(k + 2));
596            if safe {
597                code[k] = Op::LoadLocalAddLocal { lhs_idx, rhs_idx };
598                k += 3;
599                continue;
600            }
601        }
602        k += 1;
603    }
604}
605
606/// Slice 4: slice 3 for `IntSub` and `IntMul`. Fuses
607/// `[LoadLocal(lhs), LoadLocal(rhs), IntSub]` to
608/// `LoadLocalSubLocal { lhs_idx, rhs_idx }` and the `IntMul` shape
609/// to `LoadLocalMulLocal`. Same tombstone, jump-safety, and
610/// body-hash story as slice 3 — the trailing two slots stay as
611/// inert primitives with cancelling stack deltas.
612///
613/// Disjoint from every prior slice: slice 1/2 require a `PushConst`
614/// at slot 2 (here it's `LoadLocal`), and slice 3's terminator is
615/// `IntAdd` (here it's `IntSub` / `IntMul`). A given site matches at
616/// most one slice.
617fn apply_peephole_slice4(code: &mut [Op]) {
618    if code.len() < 3 { return; }
619    let jump_targets = collect_jump_targets(code);
620
621    let n = code.len();
622    let mut k = 0;
623    while k + 2 < n {
624        if let (Op::LoadLocal(lhs_idx), Op::LoadLocal(rhs_idx), terminator)
625            = (code[k], code[k + 1], code[k + 2])
626        {
627            let fused = match terminator {
628                Op::IntSub => Some(Op::LoadLocalSubLocal { lhs_idx, rhs_idx }),
629                Op::IntMul => Some(Op::LoadLocalMulLocal { lhs_idx, rhs_idx }),
630                _ => None,
631            };
632            if let Some(fused_op) = fused {
633                let safe = !jump_targets.contains(&(k + 1))
634                    && !jump_targets.contains(&(k + 2));
635                if safe {
636                    code[k] = fused_op;
637                    k += 3;
638                    continue;
639                }
640            }
641        }
642        k += 1;
643    }
644}
645
646/// Slice 5: fuse the loop-condition idiom — 4-slot window
647/// `[LoadLocal, LoadLocal|PushConst, IntLt, JumpIfNot(offset)]` —
648/// into `LoadLocalLtLocalJumpIfNot` or `LoadLocalLtIntConstJumpIfNot`.
649/// First jump-aware peephole in this codebase: the fused op carries
650/// the JumpIfNot's offset and the VM dispatches directly to either
651/// `pc + 4` (condition true, fall through past tombstones) or
652/// `pc + 4 + offset` (condition false, original JumpIfNot target).
653///
654/// Safety conditions, on top of slice 1's "tombstones must not be
655/// jump targets":
656/// 1. Trailing 3 slots (k+1, k+2, k+3) must not be jump targets from
657///    elsewhere — same as slice 1/3/4, just three of them.
658/// 2. The slot at k+3 (JumpIfNot) is the one whose offset we copy
659///    into the fused op. The offset is relative to the JumpIfNot's
660///    `pc + 1` which equals `k + 4`, so the resolved target is
661///    `k + 4 + offset` — that target must be safe to land on (it
662///    already is, since JumpIfNot is operating as designed).
663/// 3. The const-int branch checks the PushConst points at a
664///    `Const::Int` — same as slice 1.
665fn apply_peephole_slice5(code: &mut [Op], constants: &[Const]) {
666    if code.len() < 4 { return; }
667    let jump_targets = collect_jump_targets(code);
668
669    let n = code.len();
670    let mut k = 0;
671    while k + 3 < n {
672        // Match the lhs slot — always a LoadLocal.
673        let lhs_idx = match code[k] {
674            Op::LoadLocal(i) => i,
675            _ => { k += 1; continue; }
676        };
677        // Match the rhs slot — either LoadLocal or PushConst(Int).
678        // The two flavors emit different fused ops.
679        let fused = match (code[k + 1], code[k + 2], code[k + 3]) {
680            (Op::PushConst(imm_const_idx), Op::IntEq, Op::JumpIfNot(jump_offset))
681                if matches!(constants.get(imm_const_idx as usize), Some(Const::Int(_))) =>
682                Some(Op::LoadLocalEqIntConstJumpIfNot {
683                    local_idx: lhs_idx, imm_const_idx, jump_offset,
684                }),
685            _ => None,
686        };
687        if let Some(fused_op) = fused {
688            let safe = !jump_targets.contains(&(k + 1))
689                && !jump_targets.contains(&(k + 2))
690                && !jump_targets.contains(&(k + 3));
691            if safe {
692                code[k] = fused_op;
693                k += 4;
694                continue;
695            }
696        }
697        k += 1;
698    }
699}
700
701/// Slice 6: fuse the match-scrutinee dance preceding a slice-5
702/// pattern-match arm test. 3-slot window
703/// `[LoadLocal(src), StoreLocal(dst),
704///   LoadLocalEqIntConstJumpIfNot { local_idx: dst, ... }]` —
705/// where the slice-5 op's `local_idx` matches the StoreLocal's
706/// destination — rewrites to
707/// `LoadLocalStoreEqIntConstJumpIfNot { src, dst, ... }` at slot k.
708/// The fused op carries `dst` so it can mirror the original
709/// StoreLocal (later arm tests in the same match keep reading
710/// `locals[dst]`).
711///
712/// Trailing tombstones: 5 slots (the original StoreLocal + the
713/// slice-5 fused op itself + slice 5's 3 primitive tombstones).
714/// VM dispatch skips them via `pc + 6`; verifier override pushes
715/// `(pc + 6, ...)` and the branch target `(pc + 6 + jump_offset, ...)`
716/// — the offset is identical to what slice 5 stored (still relative
717/// to the original JumpIfNot's `pc + 1`, now at `k + 5 + 1 = k + 6`).
718///
719/// Safety: slots k+1..=k+5 must not be jump targets — same window
720/// safety as the other slices. Slice 5 already verified k+3..=k+5
721/// weren't jump targets when it fused; slice 6 only needs to re-check
722/// k+1 (the StoreLocal) and k+2 (the slice-5 fused op).
723fn apply_peephole_slice6(code: &mut [Op]) {
724    if code.len() < 3 { return; }
725    let jump_targets = collect_jump_targets(code);
726
727    let n = code.len();
728    let mut k = 0;
729    while k + 2 < n {
730        if let (
731            Op::LoadLocal(src),
732            Op::StoreLocal(dst),
733            Op::LoadLocalEqIntConstJumpIfNot { local_idx, imm_const_idx, jump_offset },
734        ) = (code[k], code[k + 1], code[k + 2]) {
735            // The slice-5 op must read the very local the StoreLocal
736            // just wrote; if it reads some other local this isn't the
737            // match-scrutinee idiom (could be a coincidental sequence).
738            if local_idx == dst {
739                let safe = !jump_targets.contains(&(k + 1))
740                    && !jump_targets.contains(&(k + 2));
741                if safe {
742                    code[k] = Op::LoadLocalStoreEqIntConstJumpIfNot {
743                        src, dst, imm_const_idx, jump_offset,
744                    };
745                    // Skip past this slice-6 window. The slice-5
746                    // tombstones at k+3..=k+5 are already handled by
747                    // slice 5's earlier rewrite; we don't need to
748                    // touch them.
749                    k += 3;
750                    continue;
751                }
752            }
753        }
754        k += 1;
755    }
756}
757
758/// Slice 7/8: fuse `[LoadLocal(local_idx), GetField{name_idx,
759/// site_idx}, IntAdd|IntSub|IntMul]` into the matching
760/// `LoadLocalGetField{Add,Sub,Mul} { local_idx, name_idx, site_idx }`.
761///
762/// Fires on the `acc OP r.field` accumulator-with-field-read idiom —
763/// the bytecode the compiler emits for `prev_expr OP record.field`
764/// once `prev_expr` is on the stack. Common in handler-shaped code
765/// like `r.x + r.y + r.z` (the LHS of each operator after the first
766/// matches this pattern), `acc + items[i].weight` reductions, and
767/// the `v.l - v.m` / `v.h * v.k` mixes the `response_build` profile
768/// exercises.
769///
770/// Disjoint from every prior slice: slice 1 wants `PushConst` at
771/// slot 1; slices 3-4 want `LoadLocal` at slot 1; slice 5 wants
772/// a 4-slot window with `IntEq + JumpIfNot` terminator. Only this
773/// slice matches a `GetField` at slot 1.
774///
775/// Order: must run after slice 4 (so the disjointness analysis
776/// holds — slice 3/4 patterns with a trailing IntAdd / IntSub /
777/// IntMul never carry a GetField at slot 1 and don't compete);
778/// must run before / independent of slice 5/6, which don't match
779/// any slot in this window.
780///
781/// Safety: trailing two slots (the original `GetField` and the
782/// arithmetic op) must not be jump targets. The first slot can be.
783fn apply_peephole_slice7(code: &mut [Op]) {
784    if code.len() < 3 { return; }
785    let jump_targets = collect_jump_targets(code);
786
787    let n = code.len();
788    let mut k = 0;
789    while k + 2 < n {
790        if let (Op::LoadLocal(local_idx), Op::GetField { name_idx, site_idx })
791            = (code[k], code[k + 1])
792        {
793            let fused = match code[k + 2] {
794                Op::IntAdd => Some(Op::LoadLocalGetFieldAdd { local_idx, name_idx, site_idx }),
795                Op::IntSub => Some(Op::LoadLocalGetFieldSub { local_idx, name_idx, site_idx }),
796                Op::IntMul => Some(Op::LoadLocalGetFieldMul { local_idx, name_idx, site_idx }),
797                _ => None,
798            };
799            if let Some(op) = fused {
800                let safe = !jump_targets.contains(&(k + 1))
801                    && !jump_targets.contains(&(k + 2));
802                if safe {
803                    code[k] = op;
804                    k += 3;
805                    continue;
806                }
807            }
808        }
809        k += 1;
810    }
811}
812
813/// Slice 9: fuse the bare `[LoadLocal(local_idx), GetField{name_idx,
814/// site_idx}]` pair into `LoadLocalGetField { local_idx, name_idx,
815/// site_idx }` — the plain `record.field` read, the most common
816/// field-access shape.
817///
818/// The win is allocation, not just one fewer dispatch: the unfused
819/// pair clones the entire record onto the value stack (a
820/// `Box<IndexMap>` for a heap record) only to read one field; the
821/// fused op reads the field out of the local by reference and clones
822/// only that value. On `response_build` the whole-record clone of the
823/// returned `Response` (`r.total`) was the dominant malloc source.
824///
825/// Order: MUST run after slice 7/8. Those fuse `[LoadLocal, GetField,
826/// IntAdd|IntSub|IntMul]`; if slice 9 ran first it would consume the
827/// `LoadLocal + GetField` prefix and block the 3-op fusion. After
828/// slice 7/8, the only remaining `[LoadLocal, GetField]` pairs are
829/// the ones they didn't want (chain heads, standalone reads, field
830/// reads feeding other ops). Slice 7/8's tombstone GetFields sit
831/// after their fused op, never after a bare `LoadLocal`, so slice 9
832/// won't touch them.
833///
834/// Safety: the trailing slot (the original `GetField`) must not be a
835/// jump target. The first slot can be.
836fn apply_peephole_slice9(code: &mut [Op]) {
837    if code.len() < 2 { return; }
838    let jump_targets = collect_jump_targets(code);
839
840    let n = code.len();
841    let mut k = 0;
842    while k + 1 < n {
843        if let (Op::LoadLocal(local_idx), Op::GetField { name_idx, site_idx })
844            = (code[k], code[k + 1])
845        {
846            if !jump_targets.contains(&(k + 1)) {
847                code[k] = Op::LoadLocalGetField { local_idx, name_idx, site_idx };
848                k += 2;
849                continue;
850            }
851        }
852        k += 1;
853    }
854}
855
856fn collect_jump_targets(code: &[Op]) -> std::collections::HashSet<usize> {
857    let mut targets = std::collections::HashSet::new();
858    for (pc, op) in code.iter().enumerate() {
859        let off = match op {
860            Op::Jump(off) | Op::JumpIf(off) | Op::JumpIfNot(off) => Some(*off),
861            _ => None,
862        };
863        if let Some(off) = off {
864            let target = (pc as i32 + 1 + off) as usize;
865            targets.insert(target);
866        }
867    }
868    targets
869}
870
871#[derive(Debug, Clone)]
872struct PendingLambda {
873    fn_id: u32,
874    /// Names of captured outer-scope locals, in order.
875    capture_names: Vec<String>,
876    params: Vec<a::Param>,
877    body: a::CExpr,
878}
879
880struct FnCompiler<'a> {
881    code: Vec<Op>,
882    locals: IndexMap<String, u16>,
883    next_local: u16,
884    /// Peak local usage seen during compilation (for VM frame sizing).
885    peak_local: u16,
886    /// Inferred numeric type of each local for typed numeric-op
887    /// lowering (#461). Populated when binding function parameters
888    /// (from their declared `TypeExpr::Named { name: "Int", .. }`
889    /// or `"Float"`) and when binding `let name := value` where
890    /// the RHS classifies statically. Used by `compile_binop` to
891    /// emit `Op::IntAdd` / `Op::FloatAdd` instead of the
892    /// polymorphic `Op::NumAdd` when both operands' types are
893    /// statically known. Conservative: falls back to `NumTy::Unknown`
894    /// (and the polymorphic op) whenever a type isn't locally
895    /// derivable.
896    ///
897    /// Keyed by local *name* (parallel to `locals`) rather than by
898    /// slot index so shadowed bindings are handled correctly via
899    /// `IndexMap`'s insertion-order semantics.
900    local_types: IndexMap<String, NumTy>,
901    /// Per-local map of statically-known field types (#461 slice 7).
902    /// Populated when a local is bound from a `RecordLit` whose
903    /// fields all classify to non-`Unknown` `NumTy`s. Lets
904    /// `classify_expr(FieldAccess { value: Var(name), field })`
905    /// return a precise `NumTy` instead of falling back to
906    /// `Unknown` — which in turn unlocks the typed-Add lowering
907    /// (`+` over two Ints → `IntAdd`) on `r.field + r.field`
908    /// chains, which slice 7 then fuses into
909    /// `LoadLocalGetFieldAdd`.
910    ///
911    /// Only the literal-binding case is tracked here; annotated
912    /// `let r :: R := ...` would require resolving the type alias
913    /// `R` to its field-type map, which the compiler doesn't yet
914    /// have. Future slice work.
915    local_record_field_types: IndexMap<String, IndexMap<String, NumTy>>,
916    /// Per-function counter for `Op::GetField` site indices (#462
917    /// slice 1). Each `Op::GetField` emit allocates the next index
918    /// here, giving every field-access site within this function a
919    /// stable identifier independent of pc. The VM uses
920    /// `(fn_id, site_idx)` as the inline-cache key, so the cache
921    /// survives the future dispatch rewrite (#461) and a JIT (#465).
922    field_get_sites: u32,
923    pool: &'a mut ConstPool,
924    function_names: &'a IndexMap<String, u32>,
925    module_aliases: &'a IndexMap<String, String>,
926    /// CExpr address → NodeId, populated per stage via `lex_ast::expr_ids`.
927    id_map: &'a std::collections::HashMap<*const a::CExpr, lex_ast::NodeId>,
928    /// Queue of lambdas discovered during compilation; each gets a fresh
929    /// fn_id and is compiled in a later pass.
930    pending_lambdas: &'a mut Vec<PendingLambda>,
931    /// Mutable view of the function table — used to allocate fn_ids for
932    /// freshly-discovered lambdas.
933    next_fn_id: &'a mut Vec<Function>,
934}
935
936/// Lightweight numeric-type classification used by `compile_binop`
937/// to decide whether to emit `IntAdd` / `FloatAdd` (specialized,
938/// fast) or `NumAdd` (polymorphic, runtime-typed dispatch). #461
939/// typed-lowering pass — conservative: anything not provably one
940/// of these returns `Unknown` and falls back to the polymorphic op.
941#[derive(Debug, Clone, Copy, PartialEq, Eq)]
942enum NumTy { Int, Float, Unknown }
943
944/// #461 slice 7: extract a `field_name -> NumTy` map from a record
945/// type expression. Resolves named types (`r :: R`) via
946/// `type_aliases` and returns `None` if the type ultimately isn't
947/// a record literal.
948fn record_field_types(
949    ty: &a::TypeExpr,
950    type_aliases: &IndexMap<String, a::TypeExpr>,
951) -> Option<IndexMap<String, NumTy>> {
952    match ty {
953        a::TypeExpr::Record { fields } => {
954            let mut m = IndexMap::new();
955            for f in fields {
956                m.insert(f.name.clone(), classify_type_expr(&f.ty));
957            }
958            Some(m)
959        }
960        a::TypeExpr::Refined { base, .. } => record_field_types(base, type_aliases),
961        a::TypeExpr::Named { name, args } if args.is_empty() => {
962            // Resolve the alias and recurse. Cycle protection isn't
963            // needed here — a cyclic type alias would have been
964            // rejected by `lex-types::check_program` upstream.
965            type_aliases.get(name).and_then(|t| record_field_types(t, type_aliases))
966        }
967        _ => None,
968    }
969}
970
971fn classify_type_expr(ty: &a::TypeExpr) -> NumTy {
972    match ty {
973        a::TypeExpr::Named { name, args } if args.is_empty() => match name.as_str() {
974            "Int" => NumTy::Int,
975            "Float" => NumTy::Float,
976            _ => NumTy::Unknown,
977        },
978        // `Refined { base, .. }` (#209) — classify by the base type;
979        // the refinement predicate doesn't change the value's primitive shape.
980        a::TypeExpr::Refined { base, .. } => classify_type_expr(base),
981        _ => NumTy::Unknown,
982    }
983}
984
985impl<'a> FnCompiler<'a> {
986    fn alloc_local(&mut self, name: &str) -> u16 {
987        let i = self.next_local;
988        self.locals.insert(name.into(), i);
989        self.next_local += 1;
990        if self.next_local > self.peak_local { self.peak_local = self.next_local; }
991        i
992    }
993    fn emit(&mut self, op: Op) { self.code.push(op); }
994
995    fn compile_expr(&mut self, e: &a::CExpr, tail: bool) {
996        match e {
997            a::CExpr::Literal { value } => self.compile_lit(value),
998            a::CExpr::Var { name } => {
999                if let Some(slot) = self.locals.get(name) {
1000                    self.emit(Op::LoadLocal(*slot));
1001                } else if let Some(&fn_id) = self.function_names.get(name) {
1002                    // Function name used as a *value* (e.g. as a record-field
1003                    // initializer or fold-callback arg) — materialize it as a
1004                    // closure with no captures. The runtime already accepts
1005                    // `Value::Closure { fn_id, captures: vec![] }` and
1006                    // `CallClosure` dispatches it. (#169)
1007                    self.emit(Op::MakeClosure { fn_id, capture_count: 0 });
1008                } else {
1009                    // Should be caught at type-check time; the type checker
1010                    // walks every Var. If we land here it's a compiler bug,
1011                    // not a user typo.
1012                    panic!("unknown var in compiler: {name}");
1013                }
1014            }
1015            a::CExpr::Let { name, ty, value, body } => {
1016                // Classify the RHS for typed-op lowering (#461). Prefer
1017                // the declared annotation when present (cheap O(1)
1018                // lookup); fall back to classifying the value
1019                // expression structurally.
1020                let nty = match ty {
1021                    Some(t) => classify_type_expr(t),
1022                    None => self.classify_expr(value),
1023                };
1024                // #461 slice 7: when the RHS is a record literal,
1025                // remember the field types so `name.field` accesses
1026                // downstream can classify precisely. Without this,
1027                // `r.x + r.y` falls through to `NumAdd`, blocking
1028                // the slice-7 fusion.
1029                if let a::CExpr::RecordLit { fields } = value.as_ref() {
1030                    let mut ftypes = IndexMap::new();
1031                    for f in fields {
1032                        let fty = self.classify_expr(&f.value);
1033                        ftypes.insert(f.name.clone(), fty);
1034                    }
1035                    self.local_record_field_types.insert(name.clone(), ftypes);
1036                }
1037                self.compile_expr(value, false);
1038                let slot = self.alloc_local(name);
1039                self.local_types.insert(name.clone(), nty);
1040                self.emit(Op::StoreLocal(slot));
1041                self.compile_expr(body, tail);
1042            }
1043            a::CExpr::Block { statements, result } => {
1044                for s in statements {
1045                    self.compile_expr(s, false);
1046                    self.emit(Op::Pop);
1047                }
1048                self.compile_expr(result, tail);
1049            }
1050            a::CExpr::Call { callee, args } => self.compile_call(e, callee, args, tail),
1051            a::CExpr::Constructor { name, args } => {
1052                for a in args { self.compile_expr(a, false); }
1053                let name_idx = self.pool.variant(name);
1054                self.emit(Op::MakeVariant { name_idx, arity: args.len() as u16 });
1055            }
1056            a::CExpr::Match { scrutinee, arms } => self.compile_match(scrutinee, arms, tail),
1057            a::CExpr::RecordLit { fields } => {
1058                let mut idxs = Vec::with_capacity(fields.len());
1059                for f in fields {
1060                    self.compile_expr(&f.value, false);
1061                    idxs.push(self.pool.field(&f.name));
1062                }
1063                let field_count = idxs.len() as u16;
1064                let shape_idx = self.pool.record_shape(idxs);
1065                self.emit(Op::MakeRecord { shape_idx, field_count });
1066            }
1067            a::CExpr::TupleLit { items } => {
1068                for it in items { self.compile_expr(it, false); }
1069                self.emit(Op::MakeTuple(items.len() as u16));
1070            }
1071            a::CExpr::ListLit { items } => {
1072                for it in items { self.compile_expr(it, false); }
1073                self.emit(Op::MakeList(items.len() as u32));
1074            }
1075            a::CExpr::FieldAccess { value, field } => {
1076                self.compile_expr(value, false);
1077                let name_idx = self.pool.field(field);
1078                let site_idx = self.field_get_sites;
1079                self.field_get_sites += 1;
1080                self.emit(Op::GetField { name_idx, site_idx });
1081            }
1082            a::CExpr::BinOp { op, lhs, rhs } => self.compile_binop(op, lhs, rhs),
1083            a::CExpr::UnaryOp { op, expr } => {
1084                self.compile_expr(expr, false);
1085                match op.as_str() {
1086                    "-" => self.emit(Op::NumNeg),
1087                    "not" => self.emit(Op::BoolNot),
1088                    other => panic!("unknown unary: {other}"),
1089                }
1090            }
1091            a::CExpr::Lambda { params, body, .. } => self.compile_lambda(params, body),
1092            a::CExpr::Return { value } => {
1093                self.compile_expr(value, true);
1094                self.emit(Op::Return);
1095            }
1096        }
1097    }
1098
1099    fn compile_lit(&mut self, l: &a::CLit) {
1100        let i = match l {
1101            a::CLit::Int { value } => self.pool.int(*value),
1102            a::CLit::Bool { value } => self.pool.bool(*value),
1103            a::CLit::Float { value } => {
1104                let f: f64 = value.parse().unwrap_or(0.0);
1105                self.pool.float(f)
1106            }
1107            a::CLit::Str { value } => self.pool.str(value),
1108            a::CLit::Bytes { value: _ } => {
1109                // Stub: M4 doesn't use bytes literals in §3.13 examples.
1110                let i = self.pool.pool.len() as u32;
1111                self.pool.pool.push(Const::Bytes(Vec::new()));
1112                i
1113            }
1114            a::CLit::Unit => self.pool.unit(),
1115        };
1116        self.emit(Op::PushConst(i));
1117    }
1118
1119    fn compile_call(&mut self, call_expr: &a::CExpr, callee: &a::CExpr, args: &[a::CExpr], tail: bool) {
1120        let node_id = self
1121            .id_map
1122            .get(&(call_expr as *const a::CExpr))
1123            .map(|n| n.as_str().to_string())
1124            .unwrap_or_else(|| "n_?".into());
1125        let node_id_idx = self.pool.node_id(&node_id);
1126
1127        // Module function call: `alias.op(args)` where `alias` is an imported
1128        // module ⇒ EffectCall, except for higher-order pure ops where we
1129        // emit inline bytecode using CallClosure (the closure-arg can't be
1130        // serialized through the effect handler).
1131        if let a::CExpr::FieldAccess { value, field } = callee {
1132            if let a::CExpr::Var { name } = value.as_ref() {
1133                if let Some(module) = self.module_aliases.get(name) {
1134                    if self.try_emit_higher_order(module, field, args, node_id_idx) {
1135                        let _ = tail;
1136                        return;
1137                    }
1138                    for a in args { self.compile_expr(a, false); }
1139                    let kind_idx = self.pool.str(module);
1140                    let op_idx = self.pool.str(field);
1141                    self.emit(Op::EffectCall {
1142                        kind_idx,
1143                        op_idx,
1144                        arity: args.len() as u16,
1145                        node_id_idx,
1146                    });
1147                    let _ = tail;
1148                    return;
1149                }
1150            }
1151        }
1152        match callee {
1153            a::CExpr::Var { name } if self.function_names.contains_key(name) => {
1154                for a in args { self.compile_expr(a, false); }
1155                let fn_id = self.function_names[name];
1156                if tail {
1157                    self.emit(Op::TailCall { fn_id, arity: args.len() as u16, node_id_idx });
1158                } else {
1159                    self.emit(Op::Call { fn_id, arity: args.len() as u16, node_id_idx });
1160                }
1161            }
1162            a::CExpr::Var { name } if self.locals.contains_key(name) => {
1163                // First-class function value bound to a local. Push the
1164                // closure, then args, then CallClosure.
1165                let slot = self.locals[name];
1166                self.emit(Op::LoadLocal(slot));
1167                for a in args { self.compile_expr(a, false); }
1168                self.emit(Op::CallClosure { arity: args.len() as u16, node_id_idx });
1169            }
1170            // Lambda directly applied — push closure + args + CallClosure.
1171            other => {
1172                self.compile_expr(other, false);
1173                for a in args { self.compile_expr(a, false); }
1174                self.emit(Op::CallClosure { arity: args.len() as u16, node_id_idx });
1175            }
1176        }
1177    }
1178
1179    fn compile_binop(&mut self, op: &str, lhs: &a::CExpr, rhs: &a::CExpr) {
1180        // #461 typed lowering: if we can statically prove both
1181        // operands are the same numeric type, emit the typed
1182        // primitive (`IntAdd` / `FloatAdd`) instead of the
1183        // polymorphic `NumAdd` that runtime-matches on operand
1184        // shape. The fast path skips one match per arithmetic op
1185        // *and* unblocks downstream peephole fusions (slice 1)
1186        // that scan for typed primitives. Conservative fallback
1187        // to the polymorphic op when either side classifies as
1188        // `Unknown`, so correctness for `Float` / mixed code is
1189        // unchanged.
1190        let lhs_ty = self.classify_expr(lhs);
1191        let rhs_ty = self.classify_expr(rhs);
1192        let typed = match (lhs_ty, rhs_ty) {
1193            (NumTy::Int, NumTy::Int) => NumTy::Int,
1194            (NumTy::Float, NumTy::Float) => NumTy::Float,
1195            _ => NumTy::Unknown,
1196        };
1197        self.compile_expr(lhs, false);
1198        self.compile_expr(rhs, false);
1199        match (op, typed) {
1200            ("+",  NumTy::Int)     => self.emit(Op::IntAdd),
1201            ("+",  NumTy::Float)   => self.emit(Op::FloatAdd),
1202            ("+",  NumTy::Unknown) => self.emit(Op::NumAdd),
1203            ("-",  NumTy::Int)     => self.emit(Op::IntSub),
1204            ("-",  NumTy::Float)   => self.emit(Op::FloatSub),
1205            ("-",  NumTy::Unknown) => self.emit(Op::NumSub),
1206            ("*",  NumTy::Int)     => self.emit(Op::IntMul),
1207            ("*",  NumTy::Float)   => self.emit(Op::FloatMul),
1208            ("*",  NumTy::Unknown) => self.emit(Op::NumMul),
1209            ("/",  NumTy::Int)     => self.emit(Op::IntDiv),
1210            ("/",  NumTy::Float)   => self.emit(Op::FloatDiv),
1211            ("/",  NumTy::Unknown) => self.emit(Op::NumDiv),
1212            // Int has %; Float doesn't (NumMod will reject at runtime).
1213            ("%",  NumTy::Int)     => self.emit(Op::IntMod),
1214            ("%",  _)              => self.emit(Op::NumMod),
1215            ("==", NumTy::Int)     => self.emit(Op::IntEq),
1216            ("==", NumTy::Float)   => self.emit(Op::FloatEq),
1217            ("==", NumTy::Unknown) => self.emit(Op::NumEq),
1218            ("!=", NumTy::Int)     => { self.emit(Op::IntEq);   self.emit(Op::BoolNot); }
1219            ("!=", NumTy::Float)   => { self.emit(Op::FloatEq); self.emit(Op::BoolNot); }
1220            ("!=", NumTy::Unknown) => { self.emit(Op::NumEq);   self.emit(Op::BoolNot); }
1221            ("<",  NumTy::Int)     => self.emit(Op::IntLt),
1222            ("<",  NumTy::Float)   => self.emit(Op::FloatLt),
1223            ("<",  NumTy::Unknown) => self.emit(Op::NumLt),
1224            ("<=", NumTy::Int)     => self.emit(Op::IntLe),
1225            ("<=", NumTy::Float)   => self.emit(Op::FloatLe),
1226            ("<=", NumTy::Unknown) => self.emit(Op::NumLe),
1227            (">",  NumTy::Int)     => { self.emit_swap_top2(); self.emit(Op::IntLt); }
1228            (">",  NumTy::Float)   => { self.emit_swap_top2(); self.emit(Op::FloatLt); }
1229            (">",  NumTy::Unknown) => { self.emit_swap_top2(); self.emit(Op::NumLt); }
1230            (">=", NumTy::Int)     => { self.emit_swap_top2(); self.emit(Op::IntLe); }
1231            (">=", NumTy::Float)   => { self.emit_swap_top2(); self.emit(Op::FloatLe); }
1232            (">=", NumTy::Unknown) => { self.emit_swap_top2(); self.emit(Op::NumLe); }
1233            ("and", _) => self.emit(Op::BoolAnd),
1234            ("or",  _) => self.emit(Op::BoolOr),
1235            (other, _) => panic!("unknown binop: {other:?}"),
1236        }
1237    }
1238
1239    /// Classify an expression's static numeric type for #461 typed
1240    /// lowering. Strictly conservative: only returns `Int` / `Float`
1241    /// when the type is locally derivable from a literal, an
1242    /// already-classified local, or a binary op on two same-typed
1243    /// operands. Everything else (function calls, field access,
1244    /// match expressions, ...) falls back to `Unknown` and the
1245    /// polymorphic NumAdd-family op.
1246    fn classify_expr(&self, e: &a::CExpr) -> NumTy {
1247        match e {
1248            a::CExpr::Literal { value: a::CLit::Int { .. } } => NumTy::Int,
1249            a::CExpr::Literal { value: a::CLit::Float { .. } } => NumTy::Float,
1250            a::CExpr::Var { name } =>
1251                self.local_types.get(name).copied().unwrap_or(NumTy::Unknown),
1252            a::CExpr::BinOp { op, lhs, rhs } => {
1253                // Numeric ops preserve the operand type (Int+Int=Int,
1254                // Float+Float=Float). Comparison/logical ops yield
1255                // Bool, not a numeric type — return Unknown.
1256                let is_numeric = matches!(op.as_str(), "+" | "-" | "*" | "/" | "%");
1257                if !is_numeric { return NumTy::Unknown; }
1258                match (self.classify_expr(lhs), self.classify_expr(rhs)) {
1259                    (NumTy::Int, NumTy::Int) => NumTy::Int,
1260                    (NumTy::Float, NumTy::Float) => NumTy::Float,
1261                    _ => NumTy::Unknown,
1262                }
1263            }
1264            a::CExpr::UnaryOp { op, expr } if op == "-" => self.classify_expr(expr),
1265            // #461 slice 7: `r.field` access where `r` is a local
1266            // bound from a record literal. Reads the per-local
1267            // field-type map populated at the let-binding site.
1268            // Unknown otherwise (record argument with `:: R`
1269            // annotation, helper-returned record, etc.) — those
1270            // would need type-alias resolution to classify.
1271            a::CExpr::FieldAccess { value, field } => {
1272                if let a::CExpr::Var { name } = value.as_ref() {
1273                    if let Some(ftypes) = self.local_record_field_types.get(name) {
1274                        return ftypes.get(field).copied().unwrap_or(NumTy::Unknown);
1275                    }
1276                }
1277                NumTy::Unknown
1278            }
1279            // Let-expressions: the let-binding mutates `local_types`
1280            // *during* compile_expr; classifying ahead of time would
1281            // require simulating that. Conservative fallback.
1282            _ => NumTy::Unknown,
1283        }
1284    }
1285
1286    fn emit_swap_top2(&mut self) {
1287        let a = self.alloc_local("__swap_a");
1288        let b = self.alloc_local("__swap_b");
1289        self.emit(Op::StoreLocal(b));
1290        self.emit(Op::StoreLocal(a));
1291        self.emit(Op::LoadLocal(b));
1292        self.emit(Op::LoadLocal(a));
1293    }
1294
1295    fn compile_match(&mut self, scrutinee: &a::CExpr, arms: &[a::Arm], tail: bool) {
1296        self.compile_expr(scrutinee, false);
1297        let scrut_slot = self.alloc_local("__scrut");
1298        self.emit(Op::StoreLocal(scrut_slot));
1299
1300        let mut end_jumps: Vec<usize> = Vec::new();
1301        for arm in arms {
1302            let arm_start_locals = self.next_local;
1303            let arm_start_locals_map = self.locals.clone();
1304
1305            self.emit(Op::LoadLocal(scrut_slot));
1306            let mut bindings: Vec<(String, u16)> = Vec::new();
1307            let fail_jumps: Vec<usize> = self.compile_pattern_test(&arm.pattern, &mut bindings);
1308
1309            self.compile_expr(&arm.body, tail);
1310            let j_end = self.code.len();
1311            self.emit(Op::Jump(0));
1312            end_jumps.push(j_end);
1313
1314            let fail_target = self.code.len() as i32;
1315            for j in fail_jumps {
1316                // #337: PConstructor patterns now register an
1317                // unconditional `Op::Jump` for the failure path
1318                // (alongside the existing `Op::JumpIfNot` from
1319                // PLiteral / nested constructor tests). Patch
1320                // either shape.
1321                match &mut self.code[j] {
1322                    Op::JumpIfNot(off) => *off = fail_target - (j as i32 + 1),
1323                    Op::Jump(off)      => *off = fail_target - (j as i32 + 1),
1324                    _ => {}
1325                }
1326            }
1327            self.next_local = arm_start_locals;
1328            self.locals = arm_start_locals_map;
1329        }
1330        let panic_msg_idx = self.pool.str("non-exhaustive match");
1331        self.emit(Op::Panic(panic_msg_idx));
1332
1333        let end_target = self.code.len() as i32;
1334        for j in end_jumps {
1335            if let Op::Jump(off) = &mut self.code[j] {
1336                *off = end_target - (j as i32 + 1);
1337            }
1338        }
1339    }
1340
1341    fn compile_pattern_test(&mut self, p: &a::Pattern, bindings: &mut Vec<(String, u16)>) -> Vec<usize> {
1342        let mut fails = Vec::new();
1343        match p {
1344            a::Pattern::PWild => { self.emit(Op::Pop); }
1345            a::Pattern::PVar { name } => {
1346                let slot = self.alloc_local(name);
1347                self.emit(Op::StoreLocal(slot));
1348                bindings.push((name.clone(), slot));
1349            }
1350            a::Pattern::PLiteral { value } => {
1351                self.compile_lit(value);
1352                match value {
1353                    a::CLit::Str { .. } => self.emit(Op::StrEq),
1354                    a::CLit::Bytes { .. } => self.emit(Op::BytesEq),
1355                    // Typed-lowering for numeric literal patterns
1356                    // (#461 slice 5 prerequisite). The pattern only
1357                    // reaches its test when the scrutinee has the
1358                    // literal's type (the type checker rejects
1359                    // mismatches), so emit the type-specific Eq.
1360                    // The body_hash decoder lowers IntEq/FloatEq to
1361                    // NumEq at hash time so closure identity (#222)
1362                    // is unchanged. Enables slice 5's
1363                    // `LoadLocal + PushConst + IntEq + JumpIfNot`
1364                    // peephole to fire on pattern-match arm tests.
1365                    a::CLit::Int { .. } => self.emit(Op::IntEq),
1366                    a::CLit::Float { .. } => self.emit(Op::FloatEq),
1367                    _ => self.emit(Op::NumEq),
1368                }
1369                let j = self.code.len();
1370                self.emit(Op::JumpIfNot(0));
1371                fails.push(j);
1372            }
1373            a::Pattern::PConstructor { name, args } => {
1374                let name_idx = self.pool.variant(name);
1375                // #337: the failure path must drop the duplicated
1376                // scrutinee so subsequent match arms see a clean
1377                // stack. The previous shape
1378                //   Dup; TestVariant; JumpIfNot(fail);
1379                // left `[scrut]` on the stack at the fail target,
1380                // poisoning later arms — e.g. a wildcard `_` arm
1381                // whose body referenced an unrelated value would
1382                // pop the leaked scrutinee instead of its own value.
1383                //
1384                // New shape: branch on success, fall through to a
1385                // failure cleanup that pops the dup'd scrutinee
1386                // before jumping. The registered fail-jump is an
1387                // unconditional `Op::Jump`; `compile_match`'s patch
1388                // loop accepts both `JumpIfNot` and `Jump`.
1389                self.emit(Op::Dup);                   // [scrut, scrut]
1390                self.emit(Op::TestVariant(name_idx)); // [scrut, Bool]
1391                let j_success = self.code.len();
1392                self.emit(Op::JumpIf(0));             // pop Bool. success → [scrut]
1393                self.emit(Op::Pop);                   // failure cleanup: [scrut] → []
1394                let j_fail = self.code.len();
1395                self.emit(Op::Jump(0));               // → fail target with []
1396                fails.push(j_fail);
1397                let success_target = self.code.len() as i32;
1398                if let Op::JumpIf(off) = &mut self.code[j_success] {
1399                    *off = success_target - (j_success as i32 + 1);
1400                }
1401                if args.is_empty() {
1402                    self.emit(Op::Pop);
1403                } else if args.len() == 1 {
1404                    self.emit(Op::GetVariantArg(0));
1405                    let sub_fails = self.compile_pattern_test(&args[0], bindings);
1406                    fails.extend(sub_fails);
1407                } else {
1408                    let slot = self.alloc_local("__variant");
1409                    self.emit(Op::StoreLocal(slot));
1410                    for (i, arg) in args.iter().enumerate() {
1411                        self.emit(Op::LoadLocal(slot));
1412                        self.emit(Op::GetVariantArg(i as u16));
1413                        let sub_fails = self.compile_pattern_test(arg, bindings);
1414                        fails.extend(sub_fails);
1415                    }
1416                }
1417            }
1418            a::Pattern::PRecord { fields } => {
1419                let slot = self.alloc_local("__record");
1420                self.emit(Op::StoreLocal(slot));
1421                for f in fields {
1422                    self.emit(Op::LoadLocal(slot));
1423                    let name_idx = self.pool.field(&f.name);
1424                    let site_idx = self.field_get_sites;
1425                    self.field_get_sites += 1;
1426                    self.emit(Op::GetField { name_idx, site_idx });
1427                    let sub_fails = self.compile_pattern_test(&f.pattern, bindings);
1428                    fails.extend(sub_fails);
1429                }
1430            }
1431            a::Pattern::PTuple { items } => {
1432                let slot = self.alloc_local("__tuple");
1433                self.emit(Op::StoreLocal(slot));
1434                for (i, item) in items.iter().enumerate() {
1435                    self.emit(Op::LoadLocal(slot));
1436                    self.emit(Op::GetElem(i as u16));
1437                    let sub_fails = self.compile_pattern_test(item, bindings);
1438                    fails.extend(sub_fails);
1439                }
1440            }
1441        }
1442        fails
1443    }
1444
1445    /// Compile a Lambda: collect free variables that resolve to outer-scope
1446    /// locals, register a synthetic function, emit MakeClosure with the
1447    /// captured values pushed in order.
1448    fn compile_lambda(&mut self, params: &[a::Param], body: &a::CExpr) {
1449        // Free vars = vars referenced in body that aren't bound locally.
1450        let mut bound: std::collections::HashSet<String> = params.iter().map(|p| p.name.clone()).collect();
1451        let mut frees: Vec<String> = Vec::new();
1452        free_vars(body, &mut bound, &mut frees);
1453
1454        // Filter to those that are in the enclosing locals (captures).
1455        // Don't exclude names that *also* exist in `function_names`:
1456        // if the name is in `locals`, the local shadows the global
1457        // within this scope, and the lambda needs to capture the
1458        // local's value, not the global fn. (#339) Names that are
1459        // ONLY in `function_names` (no local) stay external — the
1460        // lambda's body resolves them at call time, same as the
1461        // enclosing fn would.
1462        let captures: Vec<String> = frees.into_iter()
1463            .filter(|n| self.locals.contains_key(n))
1464            .collect();
1465
1466        // Allocate a fresh fn_id by appending a placeholder Function.
1467        let fn_id = self.next_fn_id.len() as u32;
1468        self.next_fn_id.push(Function {
1469            name: format!("__lambda_{fn_id}"),
1470            arity: (captures.len() + params.len()) as u16,
1471            locals_count: 0,
1472            code: Vec::new(),
1473            effects: Vec::new(),
1474            // See #222: filled in at the end of the compile pass.
1475            body_hash: crate::program::ZERO_BODY_HASH,
1476            // Lambdas don't carry refinements at the surface today
1477            // (closure params don't accept `Type{x | ...}` syntax in
1478            // the parser). #209 stays focused on top-level fn decls;
1479            // closure-param refinements are a follow-up.
1480            refinements: Vec::new(),
1481            // Lambda body hasn't been compiled yet; filled in by the
1482            // deferred lambda-compile pass after FnCompiler walks it.
1483            field_ic_sites: 0,
1484        });
1485
1486        // Emit code at the lambda site: load each captured local, then MakeClosure.
1487        for c in &captures {
1488            let slot = *self.locals.get(c).expect("free var must be in scope");
1489            self.emit(Op::LoadLocal(slot));
1490        }
1491        self.emit(Op::MakeClosure { fn_id, capture_count: captures.len() as u16 });
1492
1493        // Queue the body for later compilation.
1494        self.pending_lambdas.push(PendingLambda {
1495            fn_id,
1496            capture_names: captures,
1497            params: params.to_vec(),
1498            body: body.clone(),
1499        });
1500    }
1501
1502    /// Higher-order stdlib ops on Result/Option whose function arg is a
1503    /// closure. Emit inline: pattern-match on the variant, invoke the
1504    /// closure when applicable, return wrapped result.
1505    fn try_emit_higher_order(
1506        &mut self,
1507        module: &str,
1508        op: &str,
1509        args: &[a::CExpr],
1510        node_id_idx: u32,
1511    ) -> bool {
1512        match (module, op) {
1513            ("result", "map") => self.emit_variant_map(args, "Ok", true),
1514            ("result", "and_then") => self.emit_variant_map(args, "Ok", false),
1515            ("result", "map_err") => self.emit_variant_map(args, "Err", true),
1516            ("result", "or_else") => self.emit_variant_or_else(args, "Err", 1),
1517            ("option", "map") => self.emit_variant_map(args, "Some", true),
1518            ("option", "and_then") => self.emit_variant_map(args, "Some", false),
1519            ("option", "or_else") => self.emit_variant_or_else(args, "None", 0),
1520            ("option", "unwrap_or_else") => self.emit_option_unwrap_or_else(args),
1521            ("result", "unwrap_or_else") => self.emit_result_unwrap_or_else(args),
1522            ("list", "map") => self.emit_list_map(args),
1523            ("list", "par_map") => self.emit_list_par_map(args),
1524            ("list", "sort_by") => self.emit_list_sort_by(args),
1525            ("list", "filter") => self.emit_list_filter(args),
1526            ("list", "fold") => self.emit_list_fold(args),
1527            ("iter", "from_list") => self.emit_iter_from_list(args),
1528            ("iter", "unfold")    => self.emit_iter_unfold(args),
1529            ("iter", "next")      => self.emit_iter_next(args),
1530            ("iter", "is_empty")  => self.emit_iter_is_empty(args),
1531            ("iter", "count")     => self.emit_iter_count(args),
1532            ("iter", "take")      => self.emit_iter_take(args),
1533            ("iter", "skip")      => self.emit_iter_skip(args),
1534            ("iter", "to_list")   => self.emit_iter_to_list(args),
1535            ("iter", "collect")   => self.emit_iter_to_list(args),
1536            ("iter", "map")       => self.emit_iter_map(args),
1537            ("iter", "filter")    => self.emit_iter_filter(args),
1538            ("iter", "fold")      => self.emit_iter_fold(args),
1539            ("map", "fold") => self.emit_map_fold(args, node_id_idx),
1540            ("flow", "sequential") => self.emit_flow_sequential(args),
1541            ("flow", "branch") => self.emit_flow_branch(args),
1542            ("flow", "retry") => self.emit_flow_retry(args),
1543            ("flow", "retry_with_backoff") => self.emit_flow_retry_with_backoff(args),
1544            ("flow", "parallel") => self.emit_flow_parallel(args),
1545            ("flow", "parallel_list") => self.emit_flow_parallel_list(args),
1546            _ => return false,
1547        }
1548        true
1549    }
1550
1551    /// `list.map(xs, f)` — native map op (#464). Pushes `xs` then `f`
1552    /// and emits a single `Op::ListMap`. The previous inlined loop
1553    /// re-`LoadLocal`'d (cloned) the whole input and accumulator lists
1554    /// each iteration — O(n²); the native op owns the list and builds
1555    /// the result with one pre-sized allocation.
1556    fn emit_list_map(&mut self, args: &[a::CExpr]) {
1557        self.compile_expr(&args[0], false); // xs
1558        self.compile_expr(&args[1], false); // f
1559        let nid = self.pool.node_id("n_list_map");
1560        self.emit(Op::ListMap { node_id_idx: nid });
1561    }
1562
1563    /// `list.par_map(xs, f)` (#305 slice 1). Pushes `xs` and `f`,
1564    /// then emits a single `Op::ParallelMap` — the VM applies `f`
1565    /// to each element on OS-thread tasks, capped by
1566    /// `LEX_PAR_MAX_CONCURRENCY`. Returns the result list in input
1567    /// order.
1568    fn emit_list_par_map(&mut self, args: &[a::CExpr]) {
1569        self.compile_expr(&args[0], false);
1570        self.compile_expr(&args[1], false);
1571        let nid = self.pool.node_id("n_list_par_map");
1572        self.emit(Op::ParallelMap { node_id_idx: nid });
1573    }
1574
1575    /// `list.sort_by(xs, f)` (#338). Pushes `xs` and the key-fn
1576    /// `f`, then emits a single `Op::SortByKey` — the VM invokes
1577    /// `f` on each element to derive a sortable key, stable-sorts
1578    /// by key, and returns the values in sorted order. Keys must
1579    /// resolve to `Int` / `Float` / `Str`; mixed-type pairs are
1580    /// treated as equal by the comparator (preserving insertion
1581    /// order via the stable sort).
1582    fn emit_list_sort_by(&mut self, args: &[a::CExpr]) {
1583        self.compile_expr(&args[0], false);
1584        self.compile_expr(&args[1], false);
1585        let nid = self.pool.node_id("n_list_sort_by");
1586        self.emit(Op::SortByKey { node_id_idx: nid });
1587    }
1588
1589    /// `list.filter(xs, pred)` — native filter op (#464). Same
1590    /// rationale as `emit_list_map`.
1591    fn emit_list_filter(&mut self, args: &[a::CExpr]) {
1592        self.compile_expr(&args[0], false); // xs
1593        self.compile_expr(&args[1], false); // pred
1594        let nid = self.pool.node_id("n_list_filter");
1595        self.emit(Op::ListFilter { node_id_idx: nid });
1596    }
1597
1598    /// `list.fold(xs, init, f)` — native left-fold op (#464). Same
1599    /// rationale as `emit_list_map`. Stack: `[xs, init, f]`.
1600    fn emit_list_fold(&mut self, args: &[a::CExpr]) {
1601        self.compile_expr(&args[0], false); // xs
1602        self.compile_expr(&args[1], false); // init
1603        self.compile_expr(&args[2], false); // f
1604        let nid = self.pool.node_id("n_list_fold");
1605        self.emit(Op::ListFold { node_id_idx: nid });
1606    }
1607
1608    // ── Iter[T] operations (#364) ─────────────────────────────────────────
1609    // Internal representation: `Value::Variant("__IterEager", [list, idx])`
1610    // for the eager form (a List backing store + Int cursor) and
1611    // `Value::Variant("__IterLazy", [seed, step_closure])` for the lazy form
1612    // produced by `iter.unfold` (#376). Both are tagged variants so each op
1613    // can `TestVariant` at runtime to dispatch. The names start with `__` so
1614    // they can't be written by user code (uppercase ASCII-letter is required
1615    // for constructor names, and the underscores keep them out of the
1616    // user-namespace by convention).
1617
1618    /// `iter.from_list(xs)` — wrap a list in an eager iterator at position 0.
1619    fn emit_iter_from_list(&mut self, args: &[a::CExpr]) {
1620        self.compile_expr(&args[0], false);
1621        let zero = self.pool.int(0);
1622        self.emit(Op::PushConst(zero));
1623        let v = self.pool.variant("__IterEager");
1624        self.emit(Op::MakeVariant { name_idx: v, arity: 2 });
1625    }
1626
1627    /// `iter.next(it)` — advance one step; returns `Option[(T, Iter[T])]`.
1628    ///
1629    /// Dispatches on the iter's variant tag:
1630    /// - `__IterLazy(seed, step)` (#376) → invoke `step(seed)`. On
1631    ///   `Some((t, s'))` wrap as `Some((t, __IterLazy(s', step)))`; on
1632    ///   `None` propagate `None`. The seed advances forward each call.
1633    /// - `__IterCursor(handle)` (#379) → effect-call `sql.cursor_next(handle)`
1634    ///   which returns `Option[T]`. On `Some(row)` wrap as
1635    ///   `Some((row, __IterCursor(handle)))`; on `None` propagate. Handle
1636    ///   stays stable across calls — state is server-side / mpsc-buffered.
1637    /// - `__IterEager(list, idx)` → existing positional cursor.
1638    fn emit_iter_next(&mut self, args: &[a::CExpr]) {
1639        self.compile_expr(&args[0], false);
1640        let it = self.alloc_local("__in_it");
1641        self.emit(Op::StoreLocal(it));
1642
1643        // Dispatch: TestVariant pops; we Dup to keep the iter around.
1644        self.emit(Op::LoadLocal(it));
1645        self.emit(Op::Dup);
1646        let lazy_name = self.pool.variant("__IterLazy");
1647        self.emit(Op::TestVariant(lazy_name));
1648        let j_to_check_cursor = self.code.len();
1649        self.emit(Op::JumpIfNot(0));
1650
1651        // ── lazy path ────────────────────────────────────────────────
1652        // The Dup'd iter is on stack but we've consumed it via TestVariant,
1653        // so reload from the local.
1654        self.emit(Op::LoadLocal(it));
1655        self.emit(Op::GetVariantArg(0)); // seed
1656        let seed = self.alloc_local("__in_seed");
1657        self.emit(Op::StoreLocal(seed));
1658
1659        self.emit(Op::LoadLocal(it));
1660        self.emit(Op::GetVariantArg(1)); // step closure
1661        let step = self.alloc_local("__in_step");
1662        self.emit(Op::StoreLocal(step));
1663
1664        // Call step(seed) → Option[(T, S)].
1665        let nid_lazy = self.pool.node_id("n_iter_next_lazy");
1666        self.emit(Op::LoadLocal(step));
1667        self.emit(Op::LoadLocal(seed));
1668        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid_lazy });
1669        let opt = self.alloc_local("__in_opt");
1670        self.emit(Op::StoreLocal(opt));
1671
1672        // If `step` returned None, propagate it directly.
1673        self.emit(Op::LoadLocal(opt));
1674        let some_name = self.pool.variant("Some");
1675        self.emit(Op::TestVariant(some_name));
1676        let j_lazy_none = self.code.len();
1677        self.emit(Op::JumpIfNot(0));
1678
1679        // Some((t, new_seed)) — extract the inner tuple, repackage as
1680        // Some((t, __IterLazy(new_seed, step))) so the next call advances.
1681        self.emit(Op::LoadLocal(opt));
1682        self.emit(Op::GetVariantArg(0));     // (t, new_seed)
1683        let pair = self.alloc_local("__in_pair");
1684        self.emit(Op::StoreLocal(pair));
1685
1686        self.emit(Op::LoadLocal(pair));
1687        self.emit(Op::GetElem(0));           // t
1688        self.emit(Op::LoadLocal(pair));
1689        self.emit(Op::GetElem(1));           // new_seed
1690        self.emit(Op::LoadLocal(step));      // step closure
1691        let lazy_v = self.pool.variant("__IterLazy");
1692        self.emit(Op::MakeVariant { name_idx: lazy_v, arity: 2 }); // __IterLazy(new_seed, step)
1693        self.emit(Op::MakeTuple(2));         // (t, new_iter)
1694        let some_v = self.pool.variant("Some");
1695        self.emit(Op::MakeVariant { name_idx: some_v, arity: 1 });
1696        let j_after_lazy = self.code.len();
1697        self.emit(Op::Jump(0));
1698
1699        // Lazy → None: just forward the None.
1700        let none_t = self.code.len() as i32;
1701        if let Op::JumpIfNot(off) = &mut self.code[j_lazy_none] {
1702            *off = none_t - (j_lazy_none as i32 + 1);
1703        }
1704        let none_v = self.pool.variant("None");
1705        self.emit(Op::MakeVariant { name_idx: none_v, arity: 0 });
1706        let j_after_lazy_none = self.code.len();
1707        self.emit(Op::Jump(0));
1708
1709        // ── cursor path (#379) ───────────────────────────────────────
1710        let cursor_check_t = self.code.len() as i32;
1711        if let Op::JumpIfNot(off) = &mut self.code[j_to_check_cursor] {
1712            *off = cursor_check_t - (j_to_check_cursor as i32 + 1);
1713        }
1714
1715        self.emit(Op::LoadLocal(it));
1716        self.emit(Op::Dup);
1717        let cursor_name = self.pool.variant("__IterCursor");
1718        self.emit(Op::TestVariant(cursor_name));
1719        let j_to_eager = self.code.len();
1720        self.emit(Op::JumpIfNot(0));
1721
1722        // Cursor path: extract handle, effect-call sql.cursor_next(handle).
1723        // The handler returns Option[T] directly. We then wrap as
1724        // Some((T, __IterCursor(handle))) or forward None.
1725        self.emit(Op::LoadLocal(it));
1726        self.emit(Op::GetVariantArg(0));     // handle
1727        let handle = self.alloc_local("__in_handle");
1728        self.emit(Op::StoreLocal(handle));
1729
1730        let kind_idx = self.pool.str("sql");
1731        let op_idx = self.pool.str("cursor_next");
1732        let nid_cursor = self.pool.node_id("n_iter_next_cursor");
1733        self.emit(Op::LoadLocal(handle));
1734        self.emit(Op::EffectCall {
1735            kind_idx,
1736            op_idx,
1737            arity: 1,
1738            node_id_idx: nid_cursor,
1739        });
1740        let cur_opt = self.alloc_local("__in_cur_opt");
1741        self.emit(Op::StoreLocal(cur_opt));
1742
1743        self.emit(Op::LoadLocal(cur_opt));
1744        let some_c = self.pool.variant("Some");
1745        self.emit(Op::TestVariant(some_c));
1746        let j_cursor_none = self.code.len();
1747        self.emit(Op::JumpIfNot(0));
1748
1749        // Some(row): build Some((row, __IterCursor(handle)))
1750        self.emit(Op::LoadLocal(cur_opt));
1751        self.emit(Op::GetVariantArg(0));     // row
1752        self.emit(Op::LoadLocal(handle));
1753        let cursor_v = self.pool.variant("__IterCursor");
1754        self.emit(Op::MakeVariant { name_idx: cursor_v, arity: 1 });
1755        self.emit(Op::MakeTuple(2));         // (row, __IterCursor(handle))
1756        let some_c2 = self.pool.variant("Some");
1757        self.emit(Op::MakeVariant { name_idx: some_c2, arity: 1 });
1758        let j_after_cursor = self.code.len();
1759        self.emit(Op::Jump(0));
1760
1761        // Cursor → None
1762        let cursor_none_t = self.code.len() as i32;
1763        if let Op::JumpIfNot(off) = &mut self.code[j_cursor_none] {
1764            *off = cursor_none_t - (j_cursor_none as i32 + 1);
1765        }
1766        let none_c = self.pool.variant("None");
1767        self.emit(Op::MakeVariant { name_idx: none_c, arity: 0 });
1768        let j_after_cursor_none = self.code.len();
1769        self.emit(Op::Jump(0));
1770
1771        // ── eager path ───────────────────────────────────────────────
1772        let eager_t = self.code.len() as i32;
1773        if let Op::JumpIfNot(off) = &mut self.code[j_to_eager] {
1774            *off = eager_t - (j_to_eager as i32 + 1);
1775        }
1776
1777        self.emit(Op::LoadLocal(it));
1778        self.emit(Op::GetVariantArg(0));
1779        let list = self.alloc_local("__in_list");
1780        self.emit(Op::StoreLocal(list));
1781
1782        self.emit(Op::LoadLocal(it));
1783        self.emit(Op::GetVariantArg(1));
1784        let idx = self.alloc_local("__in_idx");
1785        self.emit(Op::StoreLocal(idx));
1786
1787        // if idx < len(list)
1788        self.emit(Op::LoadLocal(idx));
1789        self.emit(Op::LoadLocal(list));
1790        self.emit(Op::GetListLen);
1791        self.emit(Op::IntLt);
1792        let j_eager_else = self.code.len();
1793        self.emit(Op::JumpIfNot(0));
1794
1795        // Some((item, __IterEager(list, idx+1)))
1796        self.emit(Op::LoadLocal(list));
1797        self.emit(Op::LoadLocal(idx));
1798        self.emit(Op::GetListElemDyn);
1799
1800        self.emit(Op::LoadLocal(list));
1801        self.emit(Op::LoadLocal(idx));
1802        let one = self.pool.int(1);
1803        self.emit(Op::PushConst(one));
1804        self.emit(Op::IntAdd);
1805        let eager_v = self.pool.variant("__IterEager");
1806        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
1807        self.emit(Op::MakeTuple(2));
1808        let some_e = self.pool.variant("Some");
1809        self.emit(Op::MakeVariant { name_idx: some_e, arity: 1 });
1810        let j_after_eager = self.code.len();
1811        self.emit(Op::Jump(0));
1812
1813        // Eager → None
1814        let eager_none_t = self.code.len() as i32;
1815        if let Op::JumpIfNot(off) = &mut self.code[j_eager_else] {
1816            *off = eager_none_t - (j_eager_else as i32 + 1);
1817        }
1818        let none_e = self.pool.variant("None");
1819        self.emit(Op::MakeVariant { name_idx: none_e, arity: 0 });
1820
1821        // Converge all paths.
1822        let end = self.code.len() as i32;
1823        if let Op::Jump(off) = &mut self.code[j_after_lazy] {
1824            *off = end - (j_after_lazy as i32 + 1);
1825        }
1826        if let Op::Jump(off) = &mut self.code[j_after_lazy_none] {
1827            *off = end - (j_after_lazy_none as i32 + 1);
1828        }
1829        if let Op::Jump(off) = &mut self.code[j_after_cursor] {
1830            *off = end - (j_after_cursor as i32 + 1);
1831        }
1832        if let Op::Jump(off) = &mut self.code[j_after_cursor_none] {
1833            *off = end - (j_after_cursor_none as i32 + 1);
1834        }
1835        if let Op::Jump(off) = &mut self.code[j_after_eager] {
1836            *off = end - (j_after_eager as i32 + 1);
1837        }
1838    }
1839
1840    /// `iter.unfold(seed, step)` — lazy iterator that calls `step(seed)` on
1841    /// each `iter.next` and threads the new seed forward. Internal value
1842    /// shape: `__IterLazy(seed, step)`. Step has type `(S) -> Option[(T, S)]`;
1843    /// returning `None` ends the iteration (#376).
1844    fn emit_iter_unfold(&mut self, args: &[a::CExpr]) {
1845        self.compile_expr(&args[0], false); // seed
1846        self.compile_expr(&args[1], false); // step
1847        let lazy = self.pool.variant("__IterLazy");
1848        self.emit(Op::MakeVariant { name_idx: lazy, arity: 2 });
1849    }
1850
1851    /// `iter.is_empty(it)` — true iff no further element. v1 supports the
1852    /// eager form O(1); on a lazy iter the seed sits in slot 0 and is not a
1853    /// List, so the VM trips on `GetListLen` rather than returning a wrong
1854    /// answer. Callers needing lazy support should materialize with
1855    /// `iter.to_list` first or call `iter.next` and pattern-match.
1856    fn emit_iter_is_empty(&mut self, args: &[a::CExpr]) {
1857        self.compile_expr(&args[0], false);
1858        let it = self.alloc_local("__ie_it");
1859        self.emit(Op::StoreLocal(it));
1860
1861        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1)); // idx
1862        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0)); // list
1863        self.emit(Op::GetListLen);                                     // len
1864        self.emit(Op::IntLt);                                          // idx < len
1865        self.emit(Op::BoolNot);                                        // NOT(idx < len)
1866    }
1867
1868    /// `iter.count(it)` — number of remaining elements (v1: eager-only).
1869    fn emit_iter_count(&mut self, args: &[a::CExpr]) {
1870        self.compile_expr(&args[0], false);
1871        let it = self.alloc_local("__ic_it");
1872        self.emit(Op::StoreLocal(it));
1873
1874        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1875        self.emit(Op::GetListLen);                                     // len
1876        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1)); // idx
1877        self.emit(Op::IntSub);                                         // len - idx
1878    }
1879
1880    /// `iter.take(it, n)` — collect up to n elements, return as new Iter.
1881    fn emit_iter_take(&mut self, args: &[a::CExpr]) {
1882        self.compile_expr(&args[0], false);
1883        let it   = self.alloc_local("__itk_it");
1884        self.emit(Op::StoreLocal(it));
1885
1886        self.compile_expr(&args[1], false);
1887        let n    = self.alloc_local("__itk_n");
1888        self.emit(Op::StoreLocal(n));
1889
1890        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1891        let list = self.alloc_local("__itk_list");
1892        self.emit(Op::StoreLocal(list));
1893
1894        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
1895        let i    = self.alloc_local("__itk_i");
1896        self.emit(Op::StoreLocal(i));
1897
1898        self.emit(Op::MakeList(0));
1899        let out  = self.alloc_local("__itk_out");
1900        self.emit(Op::StoreLocal(out));
1901
1902        let zero = self.pool.int(0);
1903        self.emit(Op::PushConst(zero));
1904        let cnt  = self.alloc_local("__itk_cnt");
1905        self.emit(Op::StoreLocal(cnt));
1906
1907        let loop_top = self.code.len();
1908
1909        // while cnt < n
1910        self.emit(Op::LoadLocal(cnt));
1911        self.emit(Op::LoadLocal(n));
1912        self.emit(Op::IntLt);
1913        let j_exit_n = self.code.len();
1914        self.emit(Op::JumpIfNot(0));
1915
1916        // AND i < len(list)
1917        self.emit(Op::LoadLocal(i));
1918        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
1919        self.emit(Op::IntLt);
1920        let j_exit_l = self.code.len();
1921        self.emit(Op::JumpIfNot(0));
1922
1923        // out = out ++ [list[i]]
1924        self.emit(Op::LoadLocal(out));
1925        self.emit(Op::LoadLocal(list));
1926        self.emit(Op::LoadLocal(i));
1927        self.emit(Op::GetListElemDyn);
1928        self.emit(Op::ListAppend);
1929        self.emit(Op::StoreLocal(out));
1930
1931        let one = self.pool.int(1);
1932        // i = i + 1
1933        self.emit(Op::LoadLocal(i));
1934        self.emit(Op::PushConst(one));
1935        self.emit(Op::IntAdd);
1936        self.emit(Op::StoreLocal(i));
1937        // cnt = cnt + 1
1938        self.emit(Op::LoadLocal(cnt));
1939        self.emit(Op::PushConst(one));
1940        self.emit(Op::IntAdd);
1941        self.emit(Op::StoreLocal(cnt));
1942
1943        let jback = self.code.len();
1944        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
1945
1946        let exit_t = self.code.len() as i32;
1947        if let Op::JumpIfNot(off) = &mut self.code[j_exit_n] { *off = exit_t - (j_exit_n as i32 + 1); }
1948        if let Op::JumpIfNot(off) = &mut self.code[j_exit_l] { *off = exit_t - (j_exit_l as i32 + 1); }
1949
1950        // return new __IterEager(out, 0)
1951        self.emit(Op::LoadLocal(out));
1952        self.emit(Op::PushConst(zero));
1953        let eager_v = self.pool.variant("__IterEager");
1954        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
1955    }
1956
1957    /// `iter.skip(it, n)` — advance cursor by n (or to end), return new Iter.
1958    fn emit_iter_skip(&mut self, args: &[a::CExpr]) {
1959        self.compile_expr(&args[0], false);
1960        let it   = self.alloc_local("__isk_it");
1961        self.emit(Op::StoreLocal(it));
1962
1963        self.compile_expr(&args[1], false);
1964        let n    = self.alloc_local("__isk_n");
1965        self.emit(Op::StoreLocal(n));
1966
1967        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1968        let list = self.alloc_local("__isk_list");
1969        self.emit(Op::StoreLocal(list));
1970
1971        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
1972        let idx  = self.alloc_local("__isk_idx");
1973        self.emit(Op::StoreLocal(idx));
1974
1975        // raw = idx + n
1976        self.emit(Op::LoadLocal(idx));
1977        self.emit(Op::LoadLocal(n));
1978        self.emit(Op::IntAdd);
1979        let raw  = self.alloc_local("__isk_raw");
1980        self.emit(Op::StoreLocal(raw));
1981
1982        // new_idx = if raw < len then raw else len
1983        self.emit(Op::LoadLocal(raw));
1984        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
1985        self.emit(Op::IntLt);
1986        let j_use_raw = self.code.len();
1987        self.emit(Op::JumpIf(0));
1988
1989        // use len
1990        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
1991        let j_end = self.code.len();
1992        self.emit(Op::Jump(0));
1993
1994        // use raw
1995        let raw_t = self.code.len() as i32;
1996        if let Op::JumpIf(off) = &mut self.code[j_use_raw] { *off = raw_t - (j_use_raw as i32 + 1); }
1997        self.emit(Op::LoadLocal(raw));
1998
1999        let end_t = self.code.len() as i32;
2000        if let Op::Jump(off) = &mut self.code[j_end] { *off = end_t - (j_end as i32 + 1); }
2001
2002        // new_idx on stack; build new __IterEager(list, new_idx)
2003        let new_idx = self.alloc_local("__isk_ni");
2004        self.emit(Op::StoreLocal(new_idx));
2005        self.emit(Op::LoadLocal(list));
2006        self.emit(Op::LoadLocal(new_idx));
2007        let eager_v = self.pool.variant("__IterEager");
2008        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
2009    }
2010
2011    /// `iter.to_list(it)` — materialise remaining elements into a List.
2012    ///
2013    /// Dispatches on the iter variant (#376):
2014    /// - `__IterLazy`: repeatedly call `step(seed)`; on `Some((t, s'))` append
2015    ///   `t` and continue with `s'`; on `None` stop. May hang on truly
2016    ///   infinite producers — that's documented as a v1 limitation, the
2017    ///   step-limit-protected caller is what catches misuse.
2018    /// - `__IterEager`: slice the backing list from `idx` onward (O(n) walk).
2019    fn emit_iter_to_list(&mut self, args: &[a::CExpr]) {
2020        self.compile_expr(&args[0], false);
2021        let it = self.alloc_local("__itl_it");
2022        self.emit(Op::StoreLocal(it));
2023
2024        // Build the output list up-front, shared across both paths.
2025        self.emit(Op::MakeList(0));
2026        let out = self.alloc_local("__itl_out");
2027        self.emit(Op::StoreLocal(out));
2028
2029        // Dispatch on variant tag.
2030        self.emit(Op::LoadLocal(it));
2031        let lazy_name = self.pool.variant("__IterLazy");
2032        self.emit(Op::TestVariant(lazy_name));
2033        let j_to_eager = self.code.len();
2034        self.emit(Op::JumpIfNot(0));
2035
2036        // ── lazy path ─────────────────────────────────────────────────
2037        // seed and step closure live in locals; we update seed each iteration.
2038        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2039        let seed = self.alloc_local("__itl_seed");
2040        self.emit(Op::StoreLocal(seed));
2041
2042        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2043        let step = self.alloc_local("__itl_step");
2044        self.emit(Op::StoreLocal(step));
2045
2046        let lazy_loop = self.code.len();
2047        let nid_lazy = self.pool.node_id("n_iter_to_list_lazy");
2048        self.emit(Op::LoadLocal(step));
2049        self.emit(Op::LoadLocal(seed));
2050        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid_lazy });
2051        let opt = self.alloc_local("__itl_opt");
2052        self.emit(Op::StoreLocal(opt));
2053
2054        // If None, drop out of the lazy loop.
2055        self.emit(Op::LoadLocal(opt));
2056        let some_name = self.pool.variant("Some");
2057        self.emit(Op::TestVariant(some_name));
2058        let j_lazy_exit = self.code.len();
2059        self.emit(Op::JumpIfNot(0));
2060
2061        // Some((t, new_seed)): append t to out, replace seed.
2062        self.emit(Op::LoadLocal(opt));
2063        self.emit(Op::GetVariantArg(0));
2064        let pair = self.alloc_local("__itl_pair");
2065        self.emit(Op::StoreLocal(pair));
2066
2067        self.emit(Op::LoadLocal(out));
2068        self.emit(Op::LoadLocal(pair)); self.emit(Op::GetElem(0));
2069        self.emit(Op::ListAppend);
2070        self.emit(Op::StoreLocal(out));
2071
2072        self.emit(Op::LoadLocal(pair)); self.emit(Op::GetElem(1));
2073        self.emit(Op::StoreLocal(seed));
2074
2075        let jback_lazy = self.code.len();
2076        self.emit(Op::Jump((lazy_loop as i32) - (jback_lazy as i32 + 1)));
2077
2078        let lazy_exit_t = self.code.len() as i32;
2079        if let Op::JumpIfNot(off) = &mut self.code[j_lazy_exit] {
2080            *off = lazy_exit_t - (j_lazy_exit as i32 + 1);
2081        }
2082        let j_after_lazy = self.code.len();
2083        self.emit(Op::Jump(0));
2084
2085        // ── eager path ────────────────────────────────────────────────
2086        let eager_t = self.code.len() as i32;
2087        if let Op::JumpIfNot(off) = &mut self.code[j_to_eager] {
2088            *off = eager_t - (j_to_eager as i32 + 1);
2089        }
2090
2091        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2092        let list = self.alloc_local("__itl_list");
2093        self.emit(Op::StoreLocal(list));
2094
2095        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2096        let i = self.alloc_local("__itl_i");
2097        self.emit(Op::StoreLocal(i));
2098
2099        let loop_top = self.code.len();
2100        self.emit(Op::LoadLocal(i));
2101        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2102        self.emit(Op::IntLt);
2103        let j_exit = self.code.len();
2104        self.emit(Op::JumpIfNot(0));
2105
2106        self.emit(Op::LoadLocal(out));
2107        self.emit(Op::LoadLocal(list));
2108        self.emit(Op::LoadLocal(i));
2109        self.emit(Op::GetListElemDyn);
2110        self.emit(Op::ListAppend);
2111        self.emit(Op::StoreLocal(out));
2112
2113        self.emit(Op::LoadLocal(i));
2114        let one = self.pool.int(1);
2115        self.emit(Op::PushConst(one));
2116        self.emit(Op::IntAdd);
2117        self.emit(Op::StoreLocal(i));
2118
2119        let jback = self.code.len();
2120        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2121
2122        let exit_t = self.code.len() as i32;
2123        if let Op::JumpIfNot(off) = &mut self.code[j_exit] {
2124            *off = exit_t - (j_exit as i32 + 1);
2125        }
2126
2127        // Converge: lazy path falls through here too.
2128        let converge = self.code.len() as i32;
2129        if let Op::Jump(off) = &mut self.code[j_after_lazy] {
2130            *off = converge - (j_after_lazy as i32 + 1);
2131        }
2132        self.emit(Op::LoadLocal(out));
2133    }
2134
2135    /// `iter.map(it, f)` — apply `f` to each remaining element; returns new Iter.
2136    fn emit_iter_map(&mut self, args: &[a::CExpr]) {
2137        self.compile_expr(&args[0], false);
2138        let it   = self.alloc_local("__im_it");
2139        self.emit(Op::StoreLocal(it));
2140
2141        self.compile_expr(&args[1], false);
2142        let f    = self.alloc_local("__im_f");
2143        self.emit(Op::StoreLocal(f));
2144
2145        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2146        let list = self.alloc_local("__im_list");
2147        self.emit(Op::StoreLocal(list));
2148
2149        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2150        let i    = self.alloc_local("__im_i");
2151        self.emit(Op::StoreLocal(i));
2152
2153        self.emit(Op::MakeList(0));
2154        let out  = self.alloc_local("__im_out");
2155        self.emit(Op::StoreLocal(out));
2156
2157        let loop_top = self.code.len();
2158        self.emit(Op::LoadLocal(i));
2159        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2160        self.emit(Op::IntLt);
2161        let j_exit = self.code.len();
2162        self.emit(Op::JumpIfNot(0));
2163
2164        let nid = self.pool.node_id("n_iter_map");
2165        self.emit(Op::LoadLocal(out));
2166        self.emit(Op::LoadLocal(f));
2167        self.emit(Op::LoadLocal(list));
2168        self.emit(Op::LoadLocal(i));
2169        self.emit(Op::GetListElemDyn);
2170        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2171        self.emit(Op::ListAppend);
2172        self.emit(Op::StoreLocal(out));
2173
2174        self.emit(Op::LoadLocal(i));
2175        let one = self.pool.int(1);
2176        self.emit(Op::PushConst(one));
2177        self.emit(Op::IntAdd);
2178        self.emit(Op::StoreLocal(i));
2179
2180        let jback = self.code.len();
2181        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2182
2183        let exit_t = self.code.len() as i32;
2184        if let Op::JumpIfNot(off) = &mut self.code[j_exit] { *off = exit_t - (j_exit as i32 + 1); }
2185
2186        let zero = self.pool.int(0);
2187        self.emit(Op::LoadLocal(out));
2188        self.emit(Op::PushConst(zero));
2189        let eager_v = self.pool.variant("__IterEager");
2190        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
2191    }
2192
2193    /// `iter.filter(it, pred)` — keep elements where pred is true; returns new Iter.
2194    fn emit_iter_filter(&mut self, args: &[a::CExpr]) {
2195        self.compile_expr(&args[0], false);
2196        let it   = self.alloc_local("__if_it");
2197        self.emit(Op::StoreLocal(it));
2198
2199        self.compile_expr(&args[1], false);
2200        let f    = self.alloc_local("__if_f");
2201        self.emit(Op::StoreLocal(f));
2202
2203        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2204        let list = self.alloc_local("__if_list");
2205        self.emit(Op::StoreLocal(list));
2206
2207        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2208        let i    = self.alloc_local("__if_i");
2209        self.emit(Op::StoreLocal(i));
2210
2211        self.emit(Op::MakeList(0));
2212        let out  = self.alloc_local("__if_out");
2213        self.emit(Op::StoreLocal(out));
2214
2215        let loop_top = self.code.len();
2216        self.emit(Op::LoadLocal(i));
2217        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2218        self.emit(Op::IntLt);
2219        let j_exit = self.code.len();
2220        self.emit(Op::JumpIfNot(0));
2221
2222        // elem := list[i]
2223        self.emit(Op::LoadLocal(list));
2224        self.emit(Op::LoadLocal(i));
2225        self.emit(Op::GetListElemDyn);
2226        let x    = self.alloc_local("__if_x");
2227        self.emit(Op::StoreLocal(x));
2228
2229        let nid = self.pool.node_id("n_iter_filter");
2230        self.emit(Op::LoadLocal(f));
2231        self.emit(Op::LoadLocal(x));
2232        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2233        let j_skip = self.code.len();
2234        self.emit(Op::JumpIfNot(0));
2235
2236        self.emit(Op::LoadLocal(out));
2237        self.emit(Op::LoadLocal(x));
2238        self.emit(Op::ListAppend);
2239        self.emit(Op::StoreLocal(out));
2240
2241        let skip_t = self.code.len() as i32;
2242        if let Op::JumpIfNot(off) = &mut self.code[j_skip] { *off = skip_t - (j_skip as i32 + 1); }
2243
2244        self.emit(Op::LoadLocal(i));
2245        let one = self.pool.int(1);
2246        self.emit(Op::PushConst(one));
2247        self.emit(Op::IntAdd);
2248        self.emit(Op::StoreLocal(i));
2249
2250        let jback = self.code.len();
2251        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2252
2253        let exit_t = self.code.len() as i32;
2254        if let Op::JumpIfNot(off) = &mut self.code[j_exit] { *off = exit_t - (j_exit as i32 + 1); }
2255
2256        let zero = self.pool.int(0);
2257        self.emit(Op::LoadLocal(out));
2258        self.emit(Op::PushConst(zero));
2259        let eager_v = self.pool.variant("__IterEager");
2260        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
2261    }
2262
2263    /// `iter.fold(it, init, f)` — left fold over remaining elements.
2264    fn emit_iter_fold(&mut self, args: &[a::CExpr]) {
2265        self.compile_expr(&args[0], false);
2266        let it   = self.alloc_local("__ifo_it");
2267        self.emit(Op::StoreLocal(it));
2268
2269        self.compile_expr(&args[1], false);
2270        let acc  = self.alloc_local("__ifo_acc");
2271        self.emit(Op::StoreLocal(acc));
2272
2273        self.compile_expr(&args[2], false);
2274        let f    = self.alloc_local("__ifo_f");
2275        self.emit(Op::StoreLocal(f));
2276
2277        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2278        let list = self.alloc_local("__ifo_list");
2279        self.emit(Op::StoreLocal(list));
2280
2281        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2282        let i    = self.alloc_local("__ifo_i");
2283        self.emit(Op::StoreLocal(i));
2284
2285        let loop_top = self.code.len();
2286        self.emit(Op::LoadLocal(i));
2287        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2288        self.emit(Op::IntLt);
2289        let j_exit = self.code.len();
2290        self.emit(Op::JumpIfNot(0));
2291
2292        let nid = self.pool.node_id("n_iter_fold");
2293        self.emit(Op::LoadLocal(f));
2294        self.emit(Op::LoadLocal(acc));
2295        self.emit(Op::LoadLocal(list));
2296        self.emit(Op::LoadLocal(i));
2297        self.emit(Op::GetListElemDyn);
2298        self.emit(Op::CallClosure { arity: 2, node_id_idx: nid });
2299        self.emit(Op::StoreLocal(acc));
2300
2301        self.emit(Op::LoadLocal(i));
2302        let one = self.pool.int(1);
2303        self.emit(Op::PushConst(one));
2304        self.emit(Op::IntAdd);
2305        self.emit(Op::StoreLocal(i));
2306
2307        let jback = self.code.len();
2308        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2309
2310        let exit_t = self.code.len() as i32;
2311        if let Op::JumpIfNot(off) = &mut self.code[j_exit] { *off = exit_t - (j_exit as i32 + 1); }
2312        self.emit(Op::LoadLocal(acc));
2313    }
2314
2315    /// `map.fold(m, init, f)` — left fold over `Map[K, V]` entries with a
2316    /// three-arg combiner `f(acc, k, v)`. Iteration order matches
2317    /// `map.entries` (BTreeMap-sorted by key). Materializes the entry
2318    /// list once via the runtime's `("map", "entries")` op, then runs
2319    /// the same inline loop as `list.fold`.
2320    fn emit_map_fold(&mut self, args: &[a::CExpr], node_id_idx: u32) {
2321        // xs := map.entries(m)
2322        self.compile_expr(&args[0], false);
2323        let map_kind = self.pool.str("map");
2324        let entries_op = self.pool.str("entries");
2325        self.emit(Op::EffectCall {
2326            kind_idx: map_kind,
2327            op_idx: entries_op,
2328            arity: 1,
2329            node_id_idx,
2330        });
2331        let xs = self.alloc_local("__mf_xs");
2332        self.emit(Op::StoreLocal(xs));
2333
2334        // acc := init
2335        self.compile_expr(&args[1], false);
2336        let acc = self.alloc_local("__mf_acc");
2337        self.emit(Op::StoreLocal(acc));
2338
2339        // f := <closure>
2340        self.compile_expr(&args[2], false);
2341        let f = self.alloc_local("__mf_f");
2342        self.emit(Op::StoreLocal(f));
2343
2344        // i := 0
2345        let zero = self.pool.int(0);
2346        self.emit(Op::PushConst(zero));
2347        let i = self.alloc_local("__mf_i");
2348        self.emit(Op::StoreLocal(i));
2349
2350        // loop_top: while i < len(xs)
2351        let loop_top = self.code.len();
2352        self.emit(Op::LoadLocal(i));
2353        self.emit(Op::LoadLocal(xs));
2354        self.emit(Op::GetListLen);
2355        self.emit(Op::IntLt);
2356        let j_exit = self.code.len();
2357        self.emit(Op::JumpIfNot(0));
2358
2359        // pair := xs[i]
2360        self.emit(Op::LoadLocal(xs));
2361        self.emit(Op::LoadLocal(i));
2362        self.emit(Op::GetListElemDyn);
2363        let pair = self.alloc_local("__mf_pair");
2364        self.emit(Op::StoreLocal(pair));
2365
2366        // acc := f(acc, pair.0, pair.1)
2367        let nid = self.pool.node_id("n_map_fold");
2368        self.emit(Op::LoadLocal(f));
2369        self.emit(Op::LoadLocal(acc));
2370        self.emit(Op::LoadLocal(pair));
2371        self.emit(Op::GetElem(0));
2372        self.emit(Op::LoadLocal(pair));
2373        self.emit(Op::GetElem(1));
2374        self.emit(Op::CallClosure { arity: 3, node_id_idx: nid });
2375        self.emit(Op::StoreLocal(acc));
2376
2377        // i := i + 1
2378        self.emit(Op::LoadLocal(i));
2379        let one = self.pool.int(1);
2380        self.emit(Op::PushConst(one));
2381        self.emit(Op::IntAdd);
2382        self.emit(Op::StoreLocal(i));
2383
2384        let jump_back = self.code.len();
2385        let back = (loop_top as i32) - (jump_back as i32 + 1);
2386        self.emit(Op::Jump(back));
2387
2388        let exit_target = self.code.len() as i32;
2389        if let Op::JumpIfNot(off) = &mut self.code[j_exit] {
2390            *off = exit_target - (j_exit as i32 + 1);
2391        }
2392        self.emit(Op::LoadLocal(acc));
2393    }
2394
2395    /// Inline pattern: `<module>.map(v, f)` and friends.
2396    /// `wrap_with`: variant tag whose payload triggers the call (Ok / Some / Err).
2397    /// `wrap_result`: if true, wrap the closure's result back in `wrap_with`
2398    /// (map shape); if false, expect the closure to return a wrapped value
2399    /// itself (and_then shape).
2400    fn emit_variant_map(
2401        &mut self,
2402        args: &[a::CExpr],
2403        wrap_with: &str,
2404        wrap_result: bool,
2405    ) {
2406        // args[0] = the wrapped value (Result/Option), args[1] = closure
2407        let wrap_idx = self.pool.variant(wrap_with);
2408
2409        // Compile and store the value into a local, evaluate closure on top of stack.
2410        self.compile_expr(&args[0], false);
2411        let val_slot = self.alloc_local("__hov");
2412        self.emit(Op::StoreLocal(val_slot));
2413
2414        self.compile_expr(&args[1], false);
2415        let f_slot = self.alloc_local("__hof");
2416        self.emit(Op::StoreLocal(f_slot));
2417
2418        // Stack discipline:
2419        //   load val ⇒ [v]
2420        //   dup     ⇒ [v, v]
2421        //   test    ⇒ [v, Bool]
2422        //   jumpifnot ⇒ [v]
2423        // Both branches end with [v] before the branch body.
2424        self.emit(Op::LoadLocal(val_slot));
2425        self.emit(Op::Dup);
2426        self.emit(Op::TestVariant(wrap_idx));
2427        let j_skip = self.code.len();
2428        self.emit(Op::JumpIfNot(0));
2429
2430        // Matched arm: extract payload, call closure on it.
2431        self.emit(Op::GetVariantArg(0));
2432        let arg_slot = self.alloc_local("__hov_arg");
2433        self.emit(Op::StoreLocal(arg_slot));
2434        self.emit(Op::LoadLocal(f_slot));
2435        self.emit(Op::LoadLocal(arg_slot));
2436        let nid = self.pool.node_id("n_hov");
2437        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2438        if wrap_result {
2439            self.emit(Op::MakeVariant { name_idx: wrap_idx, arity: 1 });
2440        }
2441        let j_end = self.code.len();
2442        self.emit(Op::Jump(0));
2443
2444        // Skip arm: stack already has [v] from the failed Dup; nothing to do.
2445        let skip_target = self.code.len() as i32;
2446        if let Op::JumpIfNot(off) = &mut self.code[j_skip] {
2447            *off = skip_target - (j_skip as i32 + 1);
2448        }
2449
2450        let end_target = self.code.len() as i32;
2451        if let Op::Jump(off) = &mut self.code[j_end] {
2452            *off = end_target - (j_end as i32 + 1);
2453        }
2454    }
2455
2456    /// Sibling of `emit_variant_map` for the recovery combinators
2457    /// `result.or_else` and `option.or_else`. Differences from
2458    /// `emit_variant_map`:
2459    ///   - matches on the *negative* variant (`Err` / `None`)
2460    ///   - the closure's result becomes the call's result directly,
2461    ///     with no wrapping (it is itself a `Result` / `Option`)
2462    ///   - `option.or_else`'s closure takes zero args (`None` has no
2463    ///     payload to forward)
2464    fn emit_variant_or_else(
2465        &mut self,
2466        args: &[a::CExpr],
2467        match_on: &str,
2468        closure_arity: u16,
2469    ) {
2470        let match_idx = self.pool.variant(match_on);
2471
2472        self.compile_expr(&args[0], false);
2473        let val_slot = self.alloc_local("__hoe");
2474        self.emit(Op::StoreLocal(val_slot));
2475
2476        self.compile_expr(&args[1], false);
2477        let f_slot = self.alloc_local("__hoe_f");
2478        self.emit(Op::StoreLocal(f_slot));
2479
2480        // Stack discipline mirrors emit_variant_map:
2481        //   load val      ⇒ [v]
2482        //   dup           ⇒ [v, v]
2483        //   test          ⇒ [v, Bool]
2484        //   jumpifnot     ⇒ [v]
2485        // The unmatched arm leaves [v] (Ok/Some unchanged); the
2486        // matched arm pops [v] and pushes the closure's result.
2487        self.emit(Op::LoadLocal(val_slot));
2488        self.emit(Op::Dup);
2489        self.emit(Op::TestVariant(match_idx));
2490        let j_skip = self.code.len();
2491        self.emit(Op::JumpIfNot(0));
2492
2493        // Matched arm: pop the duplicate left on the stack,
2494        // then call the closure with whatever payload it expects.
2495        self.emit(Op::Pop);
2496        self.emit(Op::LoadLocal(f_slot));
2497        if closure_arity == 1 {
2498            self.emit(Op::LoadLocal(val_slot));
2499            self.emit(Op::GetVariantArg(0));
2500        }
2501        let nid = self.pool.node_id("n_hoe");
2502        self.emit(Op::CallClosure { arity: closure_arity, node_id_idx: nid });
2503
2504        let j_end = self.code.len();
2505        self.emit(Op::Jump(0));
2506
2507        // Unmatched arm: stack already holds [v]; nothing to do.
2508        let skip_target = self.code.len() as i32;
2509        if let Op::JumpIfNot(off) = &mut self.code[j_skip] {
2510            *off = skip_target - (j_skip as i32 + 1);
2511        }
2512
2513        let end_target = self.code.len() as i32;
2514        if let Op::Jump(off) = &mut self.code[j_end] {
2515            *off = end_target - (j_end as i32 + 1);
2516        }
2517    }
2518
2519    /// `option.unwrap_or_else(opt, f)` — lazy default via zero-arg thunk.
2520    ///   Some(x) → x          (unwrap; no wrapping)
2521    ///   None    → f()        (call thunk; return its result directly)
2522    fn emit_option_unwrap_or_else(&mut self, args: &[a::CExpr]) {
2523        let some_idx = self.pool.variant("Some");
2524
2525        // Compile opt and f; stash both so they're accessible on both arms.
2526        self.compile_expr(&args[0], false);
2527        let val_slot = self.alloc_local("__uoe_val");
2528        self.emit(Op::StoreLocal(val_slot));
2529
2530        self.compile_expr(&args[1], false);
2531        let f_slot = self.alloc_local("__uoe_f");
2532        self.emit(Op::StoreLocal(f_slot));
2533
2534        // Test whether opt is Some.
2535        //   load val ⇒ [v]
2536        //   dup      ⇒ [v, v]
2537        //   test     ⇒ [v, Bool]
2538        //   jumpifnot → None arm
2539        self.emit(Op::LoadLocal(val_slot));
2540        self.emit(Op::Dup);
2541        self.emit(Op::TestVariant(some_idx));
2542        let j_none = self.code.len();
2543        self.emit(Op::JumpIfNot(0));
2544
2545        // Some arm: extract the payload from [v] left on the stack.
2546        self.emit(Op::GetVariantArg(0));
2547        let j_end = self.code.len();
2548        self.emit(Op::Jump(0));
2549
2550        // None arm: pop the [v] duplicate, call the thunk.
2551        let none_target = self.code.len() as i32;
2552        if let Op::JumpIfNot(off) = &mut self.code[j_none] {
2553            *off = none_target - (j_none as i32 + 1);
2554        }
2555        self.emit(Op::Pop);
2556        self.emit(Op::LoadLocal(f_slot));
2557        let nid = self.pool.node_id("n_uoe");
2558        self.emit(Op::CallClosure { arity: 0, node_id_idx: nid });
2559
2560        // Patch jump-to-end from Some arm.
2561        let end_target = self.code.len() as i32;
2562        if let Op::Jump(off) = &mut self.code[j_end] {
2563            *off = end_target - (j_end as i32 + 1);
2564        }
2565    }
2566
2567    /// `result.unwrap_or_else(res, f)` — lazy fallback over the Err payload.
2568    ///   Ok(x)  → x        (unwrap; no wrapping)
2569    ///   Err(e) → f(e)     (call closure with the error; result returned directly)
2570    /// Sibling of `emit_option_unwrap_or_else`; differs only in matching on
2571    /// `Ok` and forwarding the `Err` payload to a one-arg closure. (#679)
2572    fn emit_result_unwrap_or_else(&mut self, args: &[a::CExpr]) {
2573        let ok_idx = self.pool.variant("Ok");
2574
2575        self.compile_expr(&args[0], false);
2576        let val_slot = self.alloc_local("__ruoe_val");
2577        self.emit(Op::StoreLocal(val_slot));
2578
2579        self.compile_expr(&args[1], false);
2580        let f_slot = self.alloc_local("__ruoe_f");
2581        self.emit(Op::StoreLocal(f_slot));
2582
2583        // Test whether res is Ok.
2584        //   load val ⇒ [v]
2585        //   dup      ⇒ [v, v]
2586        //   test     ⇒ [v, Bool]
2587        //   jumpifnot → Err arm
2588        self.emit(Op::LoadLocal(val_slot));
2589        self.emit(Op::Dup);
2590        self.emit(Op::TestVariant(ok_idx));
2591        let j_err = self.code.len();
2592        self.emit(Op::JumpIfNot(0));
2593
2594        // Ok arm: extract the payload from [v] left on the stack.
2595        self.emit(Op::GetVariantArg(0));
2596        let j_end = self.code.len();
2597        self.emit(Op::Jump(0));
2598
2599        // Err arm: pop the [v] duplicate, call f with the Err payload.
2600        let err_target = self.code.len() as i32;
2601        if let Op::JumpIfNot(off) = &mut self.code[j_err] {
2602            *off = err_target - (j_err as i32 + 1);
2603        }
2604        self.emit(Op::Pop);
2605        self.emit(Op::LoadLocal(f_slot));
2606        self.emit(Op::LoadLocal(val_slot));
2607        self.emit(Op::GetVariantArg(0));
2608        let nid = self.pool.node_id("n_ruoe");
2609        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2610
2611        // Patch jump-to-end from Ok arm.
2612        let end_target = self.code.len() as i32;
2613        if let Op::Jump(off) = &mut self.code[j_end] {
2614            *off = end_target - (j_end as i32 + 1);
2615        }
2616    }
2617
2618    // ---- std.flow trampolines ----------------------------------------
2619    //
2620    // Each flow.<op>(c1, c2, ...) call site:
2621    //   1. compiles its closure args and leaves them on the stack
2622    //   2. registers a fresh "trampoline" Function whose body invokes
2623    //      those captured closures appropriately
2624    //   3. emits MakeClosure { fn_id: trampoline, capture_count: N }
2625    //
2626    // The trampoline's parameter layout is [capture_0, ..., capture_{N-1},
2627    // arg_0, ...]: captures first, the closure's own args after.
2628
2629    /// Allocate a fresh fn_id for a trampoline and install its bytecode.
2630    /// Trampolines are the one Function-creation path that already has
2631    /// the body in hand at install time (top-level fns and lambdas have
2632    /// it filled in later), so we compute `body_hash` immediately. The
2633    /// final hash pass at the end of `compile_program` is a no-op here.
2634    fn install_trampoline(&mut self, name: &str, arity: u16, locals_count: u16, code: Vec<Op>) -> u32 {
2635        let fn_id = self.next_fn_id.len() as u32;
2636        let body_hash = crate::program::compute_body_hash(
2637            arity, locals_count, &code, &self.pool.record_shapes);
2638        self.next_fn_id.push(Function {
2639            name: name.into(),
2640            arity,
2641            locals_count,
2642            code,
2643            effects: Vec::new(),
2644            body_hash,
2645            // Trampolines (flow.sequential / parallel / etc.) don't
2646            // surface refined params at this layer.
2647            refinements: Vec::new(),
2648            // Trampolines never emit `Op::GetField` — they're pure
2649            // scaffolding. Leaving this at 0 means the VM allocates
2650            // an empty IC slot.
2651            field_ic_sites: 0,
2652        });
2653        fn_id
2654    }
2655
2656    /// `flow.sequential(f, g)` returns a closure `(x) -> g(f(x))`.
2657    fn emit_flow_sequential(&mut self, args: &[a::CExpr]) {
2658        // Push f, g; build the trampoline closure with 2 captures.
2659        self.compile_expr(&args[0], false);
2660        self.compile_expr(&args[1], false);
2661        let nid = self.pool.node_id("n_flow_sequential");
2662        let code = vec![
2663            // Locals: [f=0, g=1, x=2]
2664            Op::LoadLocal(0),                                  // push f
2665            Op::LoadLocal(2),                                  // push x
2666            Op::CallClosure { arity: 1, node_id_idx: nid },    // r = f(x)
2667            // stack: [r]
2668            Op::StoreLocal(3),                                 // tmp = r
2669            Op::LoadLocal(1),                                  // push g
2670            Op::LoadLocal(3),                                  // push tmp
2671            Op::CallClosure { arity: 1, node_id_idx: nid },    // r = g(tmp)
2672            Op::Return,
2673        ];
2674        let fn_id = self.install_trampoline("__flow_sequential", 3, 4, code);
2675        self.emit(Op::MakeClosure { fn_id, capture_count: 2 });
2676    }
2677
2678    /// `flow.parallel(fa, fb)` returns a closure `() -> (fa(), fb())`.
2679    /// Implementation is sequential: each function is called in order
2680    /// and the results are packed into a 2-tuple. The spec (§11.2)
2681    /// allows the runtime to apply true parallelism here; that needs
2682    /// a thread-safe handler split and is left to a follow-up. The
2683    /// signature is what users program against — sequential vs threaded
2684    /// is an implementation detail invisible to the type system.
2685    fn emit_flow_parallel(&mut self, args: &[a::CExpr]) {
2686        // Push fa, fb; build a 0-arg trampoline closure with 2 captures.
2687        self.compile_expr(&args[0], false);
2688        self.compile_expr(&args[1], false);
2689        let nid = self.pool.node_id("n_flow_parallel");
2690        let code = vec![
2691            // Locals: [fa=0, fb=1]
2692            Op::LoadLocal(0),                                  // push fa
2693            Op::CallClosure { arity: 0, node_id_idx: nid },    // a = fa()
2694            Op::LoadLocal(1),                                  // push fb
2695            Op::CallClosure { arity: 0, node_id_idx: nid },    // b = fb()
2696            Op::MakeTuple(2),                                  // (a, b)
2697            Op::Return,
2698        ];
2699        let fn_id = self.install_trampoline("__flow_parallel", 2, 2, code);
2700        self.emit(Op::MakeClosure { fn_id, capture_count: 2 });
2701    }
2702
2703    /// `flow.parallel_list(actions)` runs each 0-arg closure in `actions`
2704    /// and returns the results as a list in input order. Variadic
2705    /// counterpart to `flow.parallel`. Sequential under the hood — the
2706    /// spec (§11.2) reserves true threading for a future scheduler.
2707    /// Compiled inline (mirrors `list.map`) so closure args can flow
2708    /// through `CallClosure` without a heap-allocated trampoline.
2709    fn emit_flow_parallel_list(&mut self, args: &[a::CExpr]) {
2710        // xs := actions
2711        self.compile_expr(&args[0], false);
2712        let xs = self.alloc_local("__fpl_xs");
2713        self.emit(Op::StoreLocal(xs));
2714
2715        // out := []
2716        self.emit(Op::MakeList(0));
2717        let out = self.alloc_local("__fpl_out");
2718        self.emit(Op::StoreLocal(out));
2719
2720        // i := 0
2721        let zero = self.pool.int(0);
2722        self.emit(Op::PushConst(zero));
2723        let i = self.alloc_local("__fpl_i");
2724        self.emit(Op::StoreLocal(i));
2725
2726        // loop_top: while i < len(xs) { ... }
2727        let loop_top = self.code.len();
2728        self.emit(Op::LoadLocal(i));
2729        self.emit(Op::LoadLocal(xs));
2730        self.emit(Op::GetListLen);
2731        self.emit(Op::IntLt);
2732        let j_exit = self.code.len();
2733        self.emit(Op::JumpIfNot(0));
2734
2735        // body: out := out ++ [xs[i]()]
2736        let nid = self.pool.node_id("n_flow_parallel_list");
2737        self.emit(Op::LoadLocal(out));
2738        self.emit(Op::LoadLocal(xs));
2739        self.emit(Op::LoadLocal(i));
2740        self.emit(Op::GetListElemDyn);
2741        self.emit(Op::CallClosure { arity: 0, node_id_idx: nid });
2742        self.emit(Op::ListAppend);
2743        self.emit(Op::StoreLocal(out));
2744
2745        // i := i + 1
2746        self.emit(Op::LoadLocal(i));
2747        let one = self.pool.int(1);
2748        self.emit(Op::PushConst(one));
2749        self.emit(Op::IntAdd);
2750        self.emit(Op::StoreLocal(i));
2751
2752        // jump back
2753        let jump_back = self.code.len();
2754        let back = (loop_top as i32) - (jump_back as i32 + 1);
2755        self.emit(Op::Jump(back));
2756
2757        // exit: patch j_exit, push out
2758        let exit_target = self.code.len() as i32;
2759        if let Op::JumpIfNot(off) = &mut self.code[j_exit] {
2760            *off = exit_target - (j_exit as i32 + 1);
2761        }
2762        self.emit(Op::LoadLocal(out));
2763    }
2764
2765    /// `flow.branch(cond, t, f)` returns a closure `(x) -> if cond(x) then t(x) else f(x)`.
2766    fn emit_flow_branch(&mut self, args: &[a::CExpr]) {
2767        self.compile_expr(&args[0], false);
2768        self.compile_expr(&args[1], false);
2769        self.compile_expr(&args[2], false);
2770        let nid = self.pool.node_id("n_flow_branch");
2771        let mut code = vec![
2772            // Locals: [cond=0, t=1, f=2, x=3]
2773            Op::LoadLocal(0),                               // push cond
2774            Op::LoadLocal(3),                               // push x
2775            Op::CallClosure { arity: 1, node_id_idx: nid }, // bool
2776        ];
2777        let j_false = code.len();
2778        code.push(Op::JumpIfNot(0));                        // patched
2779        // true arm: t(x)
2780        code.push(Op::LoadLocal(1));
2781        code.push(Op::LoadLocal(3));
2782        code.push(Op::CallClosure { arity: 1, node_id_idx: nid });
2783        code.push(Op::Return);
2784        // false arm
2785        let false_target = code.len() as i32;
2786        if let Op::JumpIfNot(off) = &mut code[j_false] {
2787            *off = false_target - (j_false as i32 + 1);
2788        }
2789        code.push(Op::LoadLocal(2));
2790        code.push(Op::LoadLocal(3));
2791        code.push(Op::CallClosure { arity: 1, node_id_idx: nid });
2792        code.push(Op::Return);
2793
2794        let fn_id = self.install_trampoline("__flow_branch", 4, 4, code);
2795        self.emit(Op::MakeClosure { fn_id, capture_count: 3 });
2796    }
2797
2798    /// `flow.retry(f, max_attempts)` returns a closure `(x) -> Result[U, E]`
2799    /// that calls `f(x)` up to `max_attempts` times, returning the first
2800    /// `Ok` or the final `Err`.
2801    fn emit_flow_retry(&mut self, args: &[a::CExpr]) {
2802        self.compile_expr(&args[0], false);
2803        self.compile_expr(&args[1], false);
2804        let call_nid = self.pool.node_id("n_flow_retry");
2805        let ok_idx = self.pool.variant("Ok");
2806        let zero_const = self.pool.int(0);
2807        let one_const = self.pool.int(1);
2808        // Locals: [f=0, max=1, x=2, i=3, last=4]
2809        let mut code = vec![
2810            // i := 0
2811            Op::PushConst(zero_const),
2812            Op::StoreLocal(3),
2813        ];
2814        // loop_top: while i < max
2815        let loop_top = code.len() as i32;
2816        code.push(Op::LoadLocal(3));
2817        code.push(Op::LoadLocal(1));
2818        code.push(Op::IntLt);
2819        let j_done = code.len();
2820        code.push(Op::JumpIfNot(0));                       // patched
2821
2822        // body: r := f(x); last := r
2823        code.push(Op::LoadLocal(0));
2824        code.push(Op::LoadLocal(2));
2825        code.push(Op::CallClosure { arity: 1, node_id_idx: call_nid });
2826        code.push(Op::StoreLocal(4));
2827
2828        // Test variant Ok on last; if so, return last.
2829        code.push(Op::LoadLocal(4));
2830        code.push(Op::TestVariant(ok_idx));
2831        let j_was_err = code.len();
2832        code.push(Op::JumpIfNot(0));                       // patched: skip return
2833        code.push(Op::LoadLocal(4));
2834        code.push(Op::Return);
2835
2836        // was_err: i := i + 1; jump loop_top
2837        let was_err_target = code.len() as i32;
2838        if let Op::JumpIfNot(off) = &mut code[j_was_err] {
2839            *off = was_err_target - (j_was_err as i32 + 1);
2840        }
2841        code.push(Op::LoadLocal(3));
2842        code.push(Op::PushConst(one_const));
2843        code.push(Op::IntAdd);
2844        code.push(Op::StoreLocal(3));
2845        let pc_after_jump = code.len() as i32 + 1;
2846        code.push(Op::Jump(loop_top - pc_after_jump));
2847
2848        // done: return last (the final Err, or Unit if max=0).
2849        let done_target = code.len() as i32;
2850        if let Op::JumpIfNot(off) = &mut code[j_done] {
2851            *off = done_target - (j_done as i32 + 1);
2852        }
2853        code.push(Op::LoadLocal(4));
2854        code.push(Op::Return);
2855
2856        let fn_id = self.install_trampoline("__flow_retry", 3, 5, code);
2857        self.emit(Op::MakeClosure { fn_id, capture_count: 2 });
2858    }
2859
2860    /// `flow.retry_with_backoff(f, attempts, base_ms)` (#226). Variant
2861    /// of `flow.retry` that sleeps between attempts. The first
2862    /// attempt fires immediately; attempt k > 1 waits `base_ms *
2863    /// 2^(k-2)` ms before retrying. Sleeps go through
2864    /// `time.sleep_ms`, which is why the resulting closure carries
2865    /// `[time]` in its effect row even though the underlying `f` is
2866    /// pure.
2867    fn emit_flow_retry_with_backoff(&mut self, args: &[a::CExpr]) {
2868        // Push captures: f, max, base_ms. The trampoline takes one
2869        // call-time arg `x`, so capture_count = 3, arity = 4.
2870        self.compile_expr(&args[0], false);
2871        self.compile_expr(&args[1], false);
2872        self.compile_expr(&args[2], false);
2873        let call_nid    = self.pool.node_id("n_flow_retry_backoff");
2874        let sleep_nid   = self.pool.node_id("n_flow_retry_backoff_sleep");
2875        let kind_idx    = self.pool.str("time");
2876        let op_idx      = self.pool.str("sleep_ms");
2877        let ok_idx      = self.pool.variant("Ok");
2878        let zero_const  = self.pool.int(0);
2879        let one_const   = self.pool.int(1);
2880        let two_const   = self.pool.int(2);
2881        // Locals layout:
2882        //   0=f, 1=max, 2=base_ms (captures)
2883        //   3=x (arg)
2884        //   4=i, 5=last, 6=next_delay (working state)
2885        let mut code = vec![
2886            // next_delay := base_ms
2887            Op::LoadLocal(2),
2888            Op::StoreLocal(6),
2889            // i := 0
2890            Op::PushConst(zero_const),
2891            Op::StoreLocal(4),
2892        ];
2893
2894        let loop_top = code.len() as i32;
2895        // while i < max
2896        code.push(Op::LoadLocal(4));
2897        code.push(Op::LoadLocal(1));
2898        code.push(Op::IntLt);
2899        let j_done = code.len();
2900        code.push(Op::JumpIfNot(0)); // patched
2901
2902        // if i > 0: time.sleep_ms(next_delay); next_delay := next_delay * 2
2903        code.push(Op::PushConst(zero_const));
2904        code.push(Op::LoadLocal(4));
2905        code.push(Op::IntLt);                // 0 < i ?
2906        let j_no_sleep = code.len();
2907        code.push(Op::JumpIfNot(0));         // patched: skip the sleep block
2908        // Sleep
2909        code.push(Op::LoadLocal(6));         // arg = next_delay
2910        code.push(Op::EffectCall {
2911            kind_idx, op_idx, arity: 1, node_id_idx: sleep_nid,
2912        });
2913        code.push(Op::Pop);                  // discard the Unit result
2914        // next_delay := next_delay * 2
2915        code.push(Op::LoadLocal(6));
2916        code.push(Op::PushConst(two_const));
2917        code.push(Op::NumMul);
2918        code.push(Op::StoreLocal(6));
2919        // patch the no-sleep skip
2920        let after_sleep = code.len() as i32;
2921        if let Op::JumpIfNot(off) = &mut code[j_no_sleep] {
2922            *off = after_sleep - (j_no_sleep as i32 + 1);
2923        }
2924
2925        // last := f(x)
2926        code.push(Op::LoadLocal(0));
2927        code.push(Op::LoadLocal(3));
2928        code.push(Op::CallClosure { arity: 1, node_id_idx: call_nid });
2929        code.push(Op::StoreLocal(5));
2930
2931        // if Ok(last): return last
2932        code.push(Op::LoadLocal(5));
2933        code.push(Op::TestVariant(ok_idx));
2934        let j_was_err = code.len();
2935        code.push(Op::JumpIfNot(0)); // patched
2936        code.push(Op::LoadLocal(5));
2937        code.push(Op::Return);
2938
2939        // was_err: i := i + 1; jump loop_top
2940        let was_err_target = code.len() as i32;
2941        if let Op::JumpIfNot(off) = &mut code[j_was_err] {
2942            *off = was_err_target - (j_was_err as i32 + 1);
2943        }
2944        code.push(Op::LoadLocal(4));
2945        code.push(Op::PushConst(one_const));
2946        code.push(Op::IntAdd);
2947        code.push(Op::StoreLocal(4));
2948        let pc_after_jump = code.len() as i32 + 1;
2949        code.push(Op::Jump(loop_top - pc_after_jump));
2950
2951        // done: return last (the final Err, or Unit if max=0).
2952        let done_target = code.len() as i32;
2953        if let Op::JumpIfNot(off) = &mut code[j_done] {
2954            *off = done_target - (j_done as i32 + 1);
2955        }
2956        code.push(Op::LoadLocal(5));
2957        code.push(Op::Return);
2958
2959        let fn_id = self.install_trampoline("__flow_retry_backoff", 4, 7, code);
2960        self.emit(Op::MakeClosure { fn_id, capture_count: 3 });
2961    }
2962}
2963
2964/// Collect free variables referenced in `e` that are not in `bound`.
2965/// Mutates `bound` to track let/lambda introductions during the walk;
2966/// the caller's set is preserved on return because Rust's borrow rules
2967/// force us to clone for sub-scopes that rebind a name.
2968fn free_vars(e: &a::CExpr, bound: &mut std::collections::HashSet<String>, out: &mut Vec<String>) {
2969    match e {
2970        a::CExpr::Literal { .. } => {}
2971        a::CExpr::Var { name } => {
2972            if !bound.contains(name) && !out.contains(name) {
2973                out.push(name.clone());
2974            }
2975        }
2976        a::CExpr::Call { callee, args } => {
2977            free_vars(callee, bound, out);
2978            for a in args { free_vars(a, bound, out); }
2979        }
2980        a::CExpr::Let { name, value, body, .. } => {
2981            free_vars(value, bound, out);
2982            let was_bound = bound.contains(name);
2983            bound.insert(name.clone());
2984            free_vars(body, bound, out);
2985            if !was_bound { bound.remove(name); }
2986        }
2987        a::CExpr::Match { scrutinee, arms } => {
2988            free_vars(scrutinee, bound, out);
2989            for arm in arms {
2990                let mut local_bound = bound.clone();
2991                pattern_binders(&arm.pattern, &mut local_bound);
2992                free_vars(&arm.body, &mut local_bound, out);
2993            }
2994        }
2995        a::CExpr::Block { statements, result } => {
2996            let mut local_bound = bound.clone();
2997            for s in statements { free_vars(s, &mut local_bound, out); }
2998            free_vars(result, &mut local_bound, out);
2999        }
3000        a::CExpr::Constructor { args, .. } => {
3001            for a in args { free_vars(a, bound, out); }
3002        }
3003        a::CExpr::RecordLit { fields } => {
3004            for f in fields { free_vars(&f.value, bound, out); }
3005        }
3006        a::CExpr::TupleLit { items } | a::CExpr::ListLit { items } => {
3007            for it in items { free_vars(it, bound, out); }
3008        }
3009        a::CExpr::FieldAccess { value, .. } => free_vars(value, bound, out),
3010        a::CExpr::Lambda { params, body, .. } => {
3011            let mut inner = bound.clone();
3012            for p in params { inner.insert(p.name.clone()); }
3013            free_vars(body, &mut inner, out);
3014        }
3015        a::CExpr::BinOp { lhs, rhs, .. } => {
3016            free_vars(lhs, bound, out);
3017            free_vars(rhs, bound, out);
3018        }
3019        a::CExpr::UnaryOp { expr, .. } => free_vars(expr, bound, out),
3020        a::CExpr::Return { value } => free_vars(value, bound, out),
3021    }
3022}
3023
3024fn pattern_binders(p: &a::Pattern, bound: &mut std::collections::HashSet<String>) {
3025    match p {
3026        a::Pattern::PWild | a::Pattern::PLiteral { .. } => {}
3027        a::Pattern::PVar { name } => { bound.insert(name.clone()); }
3028        a::Pattern::PConstructor { args, .. } => {
3029            for a in args { pattern_binders(a, bound); }
3030        }
3031        a::Pattern::PRecord { fields } => {
3032            for f in fields { pattern_binders(&f.pattern, bound); }
3033        }
3034        a::Pattern::PTuple { items } => {
3035            for it in items { pattern_binders(it, bound); }
3036        }
3037    }
3038}