Skip to main content

lex_bytecode/
compiler.rs

1//! M4 compiler: canonical AST → bytecode.
2
3use crate::op::*;
4use crate::program::*;
5use indexmap::IndexMap;
6use lex_ast as a;
7
8#[derive(Default)]
9struct ConstPool {
10    pool: Vec<Const>,
11    fields: IndexMap<String, u32>,
12    variants: IndexMap<String, u32>,
13    node_ids: IndexMap<String, u32>,
14    ints: IndexMap<i64, u32>,
15    bools: IndexMap<u8, u32>,
16    strs: IndexMap<String, u32>,
17    /// Interned record field-name shapes (#461). Deduplicated by content
18    /// so a record literal with the same field-name layout reuses the
19    /// same `shape_idx` across the whole program — keeps the side-table
20    /// small even when the same struct is constructed in many places.
21    record_shapes: Vec<Vec<u32>>,
22    record_shape_dedup: IndexMap<Vec<u32>, u32>,
23}
24
25impl ConstPool {
26    fn field(&mut self, name: &str) -> u32 {
27        if let Some(i) = self.fields.get(name) { return *i; }
28        let i = self.pool.len() as u32;
29        self.pool.push(Const::FieldName(name.into()));
30        self.fields.insert(name.into(), i);
31        i
32    }
33    fn variant(&mut self, name: &str) -> u32 {
34        if let Some(i) = self.variants.get(name) { return *i; }
35        let i = self.pool.len() as u32;
36        self.pool.push(Const::VariantName(name.into()));
37        self.variants.insert(name.into(), i);
38        i
39    }
40    fn node_id(&mut self, name: &str) -> u32 {
41        if let Some(i) = self.node_ids.get(name) { return *i; }
42        let i = self.pool.len() as u32;
43        self.pool.push(Const::NodeId(name.into()));
44        self.node_ids.insert(name.into(), i);
45        i
46    }
47    fn int(&mut self, n: i64) -> u32 {
48        if let Some(i) = self.ints.get(&n) { return *i; }
49        let i = self.pool.len() as u32;
50        self.pool.push(Const::Int(n));
51        self.ints.insert(n, i);
52        i
53    }
54    fn bool(&mut self, b: bool) -> u32 {
55        let key = b as u8;
56        if let Some(i) = self.bools.get(&key) { return *i; }
57        let i = self.pool.len() as u32;
58        self.pool.push(Const::Bool(b));
59        self.bools.insert(key, i);
60        i
61    }
62    fn str(&mut self, s: &str) -> u32 {
63        if let Some(i) = self.strs.get(s) { return *i; }
64        let i = self.pool.len() as u32;
65        self.pool.push(Const::Str(s.into()));
66        self.strs.insert(s.into(), i);
67        i
68    }
69    fn float(&mut self, f: f64) -> u32 {
70        // Floats: not deduped (NaN issues).
71        let i = self.pool.len() as u32;
72        self.pool.push(Const::Float(f));
73        i
74    }
75    fn unit(&mut self) -> u32 {
76        let i = self.pool.len() as u32;
77        self.pool.push(Const::Unit);
78        i
79    }
80
81    /// Intern a record field-name shape (#461). Returns the index into
82    /// `record_shapes`; identical shapes (same field-name-index vec)
83    /// always return the same index.
84    fn record_shape(&mut self, idxs: Vec<u32>) -> u32 {
85        if let Some(i) = self.record_shape_dedup.get(&idxs) {
86            return *i;
87        }
88        let i = self.record_shapes.len() as u32;
89        self.record_shape_dedup.insert(idxs.clone(), i);
90        self.record_shapes.push(idxs);
91        i
92    }
93}
94
95pub fn compile_program(stages: &[a::Stage]) -> Program {
96    let mut p = Program {
97        constants: Vec::new(),
98        functions: Vec::new(),
99        function_names: IndexMap::new(),
100        module_aliases: IndexMap::new(),
101        entry: None,
102        record_shapes: Vec::new(),
103    };
104
105    // Collect imports as alias → module-name. The module name is the part
106    // after `std.` (so `import "std.io" as io` ⇒ alias `io` → module `io`).
107    for s in stages {
108        if let a::Stage::Import(i) = s {
109            let module = i.reference.strip_prefix("std.").unwrap_or(&i.reference).to_string();
110            p.module_aliases.insert(i.alias.clone(), module);
111        }
112    }
113
114    for s in stages {
115        if let a::Stage::FnDecl(fd) = s {
116            let idx = p.functions.len() as u32;
117            p.function_names.insert(fd.name.clone(), idx);
118            p.functions.push(Function {
119                name: fd.name.clone(),
120                arity: fd.params.len() as u16,
121                locals_count: 0,
122                code: Vec::new(),
123                effects: fd.effects.iter().map(|e| DeclaredEffect {
124                    kind: e.name.clone(),
125                    arg: e.arg.as_ref().map(|a| match a {
126                        a::EffectArg::Str { value } => EffectArg::Str(value.clone()),
127                        a::EffectArg::Int { value } => EffectArg::Int(*value),
128                        a::EffectArg::Ident { value } => EffectArg::Ident(value.clone()),
129                    }),
130                }).collect(),
131                // Filled in at the end of the compile pass, once `code`
132                // and `locals_count` are final. See #222.
133                body_hash: crate::program::ZERO_BODY_HASH,
134                // Per-param refinement predicates for runtime check
135                // (#209 slice 3). Lifted directly from each param's
136                // `TypeExpr::Refined` if present; `None` otherwise.
137                refinements: fd.params.iter().map(|p| match &p.ty {
138                    a::TypeExpr::Refined { binding, predicate, .. } =>
139                        Some(crate::program::Refinement {
140                            binding: binding.clone(),
141                            predicate: (**predicate).clone(),
142                        }),
143                    _ => None,
144                }).collect(),
145                // Filled in below once the FnCompiler counts emit sites.
146                field_ic_sites: 0,
147            });
148        }
149    }
150
151    let mut pool = ConstPool::default();
152    let function_names = p.function_names.clone();
153    let module_aliases = p.module_aliases.clone();
154    let mut pending_lambdas: Vec<PendingLambda> = Vec::new();
155    // #461 slice 7: collect `type Foo = { ... }` aliases so
156    // `record_field_types` can resolve a parameter's named record
157    // type to its field layout. Without this, `r :: R` where
158    // `type R = { x :: Int, y :: Int }` falls through to Unknown
159    // and the typed-Add lowering misses on `r.x + r.y`.
160    let mut type_aliases: IndexMap<String, a::TypeExpr> = IndexMap::new();
161    for s in stages {
162        if let a::Stage::TypeDecl(td) = s {
163            // Parameterized type aliases (`type Box[T] = ...`) are
164            // out of scope for this slice — without monomorphization
165            // we can't know what T resolves to. Skip them.
166            if td.params.is_empty() {
167                type_aliases.insert(td.name.clone(), td.definition.clone());
168            }
169        }
170    }
171
172    for s in stages {
173        if let a::Stage::FnDecl(_) = s {
174            // Build a NodeId map for *this* stage so the compiler can stamp
175            // each Call/EffectCall opcode with the originating AST node.
176            let id_map = lex_ast::expr_ids(s);
177            let fd = match s { a::Stage::FnDecl(fd) => fd, _ => unreachable!() };
178            let mut fc = FnCompiler {
179                code: Vec::new(),
180                locals: IndexMap::new(),
181                next_local: 0,
182                peak_local: 0,
183                local_types: IndexMap::new(),
184                local_record_field_types: IndexMap::new(),
185                field_get_sites: 0,
186                pool: &mut pool,
187                function_names: &function_names,
188                module_aliases: &module_aliases,
189                id_map: &id_map,
190                pending_lambdas: &mut pending_lambdas,
191                next_fn_id: &mut p.functions,
192            };
193            for param in &fd.params {
194                let i = fc.next_local;
195                fc.locals.insert(param.name.clone(), i);
196                fc.local_types.insert(param.name.clone(), classify_type_expr(&param.ty));
197                // #461 slice 7: inline-record parameter (`r ::
198                // { x :: Int, y :: Int }`) — populate the per-local
199                // field-type map so `r.x + r.y` classifies as
200                // Int+Int → IntAdd, which slice 7 then fuses.
201                if let Some(ftypes) = record_field_types(&param.ty, &type_aliases) {
202                    fc.local_record_field_types.insert(param.name.clone(), ftypes);
203                }
204                fc.next_local += 1;
205                fc.peak_local = fc.next_local;
206            }
207            fc.compile_expr(&fd.body, true);
208            fc.code.push(Op::Return);
209            let code = std::mem::take(&mut fc.code);
210            let peak = fc.peak_local;
211            let field_sites = fc.field_get_sites as u16;
212            drop(fc);
213            let idx = function_names[&fd.name];
214            p.functions[idx as usize].code = code;
215            p.functions[idx as usize].field_ic_sites = field_sites;
216            p.functions[idx as usize].locals_count = peak;
217        }
218    }
219
220    // Compile pending lambdas in FIFO order. Each lambda may emit further
221    // lambdas; loop until the queue drains.
222    while let Some(pl) = pending_lambdas.pop() {
223        let id_map = std::collections::HashMap::new();
224        let mut fc = FnCompiler {
225            code: Vec::new(),
226            locals: IndexMap::new(),
227            next_local: 0,
228            peak_local: 0,
229            local_types: IndexMap::new(),
230            local_record_field_types: IndexMap::new(),
231            field_get_sites: 0,
232            pool: &mut pool,
233            function_names: &function_names,
234            module_aliases: &module_aliases,
235            id_map: &id_map,
236            pending_lambdas: &mut pending_lambdas,
237            next_fn_id: &mut p.functions,
238        };
239        for name in &pl.capture_names {
240            let i = fc.next_local;
241            fc.locals.insert(name.clone(), i);
242            // Captures' static types aren't known at this layer
243            // — the closure's environment carries them dynamically.
244            // Conservative fallback; binop lowering stays correct
245            // because Unknown classifies through to NumAdd.
246            fc.local_types.insert(name.clone(), NumTy::Unknown);
247            fc.next_local += 1;
248            fc.peak_local = fc.next_local;
249        }
250        for p in &pl.params {
251            let i = fc.next_local;
252            fc.locals.insert(p.name.clone(), i);
253            fc.local_types.insert(p.name.clone(), classify_type_expr(&p.ty));
254            fc.next_local += 1;
255            fc.peak_local = fc.next_local;
256        }
257        fc.compile_expr(&pl.body, true);
258        fc.code.push(Op::Return);
259        let code = std::mem::take(&mut fc.code);
260        let peak = fc.peak_local;
261        let field_sites = fc.field_get_sites as u16;
262        drop(fc);
263        p.functions[pl.fn_id as usize].code = code;
264        p.functions[pl.fn_id as usize].field_ic_sites = field_sites;
265        p.functions[pl.fn_id as usize].locals_count = peak;
266    }
267
268    // #464 step 2: escape-analysis-driven lowering. Rewrites
269    // `MakeRecord` at non-escaping sites to `AllocStackRecord`, which
270    // the VM allocates in the frame's stack-record arena instead of
271    // on the heap. Runs on raw bytecode (before the peephole passes)
272    // so the escape analysis — which itself walks raw bytecode — sees
273    // exactly the program it was designed for.
274    //
275    // The peephole passes that follow do not match on MakeRecord /
276    // AllocStackRecord, so swapping one for the other doesn't disturb
277    // any pattern. `compute_body_hash` lowers AllocStackRecord back
278    // to the legacy MakeRecord form (#222), so closure identity is
279    // invariant under this lowering.
280    //
281    // Escape hatch: `LEX_NO_STACK_RECORDS=1` skips the lowering
282    // entirely (#464 step 3). The flag exists so the bench can A/B
283    // the same source under matched VM/peephole conditions; in
284    // production code the pass always runs.
285    if std::env::var_os("LEX_NO_STACK_RECORDS").is_none() {
286        let escape_index = crate::escape::build_escape_index(&p.functions);
287        for f in p.functions.iter_mut() {
288            apply_escape_lowering(&mut f.code, &f.name, &escape_index);
289        }
290    }
291
292    // #463 slice 2b-i: arena-eligibility lowering. Runs **after**
293    // `apply_escape_lowering` and targets the remaining `MakeRecord`
294    // / `MakeTuple` sites — those the stack pass left alone because
295    // they cross the frame boundary, but the request-scope analysis
296    // proves they stay inside the active `EffectHandler` arena
297    // scope. The two passes form a three-tier allocation hierarchy:
298    //
299    //   frame-local        → AllocStackRecord  (#464, cheapest)
300    //   request-local      → AllocArenaRecord  (#463, this slice)
301    //   escapes request    → MakeRecord        (heap, status quo)
302    //
303    // Order matters: a site that fits the stack tier should land
304    // there (cheapest), so the stack pass runs first. The arena
305    // pass's match doesn't fire on AllocStackRecord, so already-
306    // stack-lowered sites stay stack-lowered. Sites that escape the
307    // frame and the request both pass through unchanged.
308    //
309    // Escape hatch: `LEX_NO_ARENA_RECORDS=1` skips the lowering,
310    // mirroring `LEX_NO_STACK_RECORDS`. The slice-2b-i bench uses
311    // this to A/B identical source under matched VM conditions.
312    //
313    // `body_hash` invariance: `compute_body_hash` decodes
314    // `AllocArenaRecord` / `AllocArenaTuple` back to their legacy
315    // `MakeRecord` / `MakeTuple` form, so closure identity (#222) is
316    // bit-identical across this and the stack lowering.
317    if std::env::var_os("LEX_NO_ARENA_RECORDS").is_none() {
318        let arena_index = crate::arena::build_arena_index(&p.functions);
319        for f in p.functions.iter_mut() {
320            apply_arena_lowering(&mut f.code, &f.name, &arena_index);
321        }
322    }
323
324    // Peephole pass (#461 superinstructions). Rewrites fusable opcode
325    // patterns into single dispatch steps. Runs before `body_hash`
326    // computation, but `compute_body_hash` decomposes each fused op
327    // back to its primitive form on hash — so closure identity (#222)
328    // is invariant under this pass and the order doesn't matter.
329    //
330    // Slices run sequentially: slice 2 looks for slice-1 output
331    // followed by a StoreLocal, so it must follow slice 1. Slice 3
332    // (LoadLocal + LoadLocal + IntAdd) is disjoint from both — its
333    // second slot is LoadLocal, not PushConst — so it can run in
334    // either order. Run it last to keep the slice 1/2 contract
335    // (slice 2 expects to see slice-1 output) untouched. Slice 4 is
336    // slice 3 for IntSub / IntMul (same pattern, different terminator);
337    // disjoint from every prior slice because the terminator op
338    // disambiguates, so order between slice 3 and slice 4 is free.
339    for f in p.functions.iter_mut() {
340        apply_peephole(&mut f.code, &pool.pool);
341        apply_peephole_slice2(&mut f.code);
342        apply_peephole_slice3(&mut f.code);
343        apply_peephole_slice4(&mut f.code);
344        // Slice 5 — jump-aware fusion of the loop-condition idiom
345        // (LoadLocal + LoadLocal/PushConst + IntLt + JumpIfNot).
346        // Runs after slices 3/4 because their 3-slot windows
347        // overlap slice 5's 4-slot window at position 0 and 1; if
348        // slice 3 fired first and consumed `LoadLocal + LoadLocal +
349        // IntAdd`, the `IntLt + JumpIfNot` that follows would not
350        // be a fusion candidate. Since slice 3's terminator is
351        // `IntAdd` and slice 5's is `IntLt`, the two don't compete
352        // on the same site — order between them is technically free
353        // but conventionally slice N runs after slice N-1.
354        apply_peephole_slice5(&mut f.code, &pool.pool);
355        // Slice 6 — absorb the match-scrutinee dance
356        // (`LoadLocal + StoreLocal` immediately preceding a slice-5
357        // fused op that reads the just-stored local). Must run after
358        // slice 5 since it matches on slice 5's output.
359        apply_peephole_slice6(&mut f.code);
360        // Slice 7/8 — fuse `LoadLocal + GetField + IntAdd|IntSub|IntMul`,
361        // the accumulator-with-field-read idiom. Disjoint from every
362        // earlier slice (only this one matches a GetField at slot 1),
363        // so order is independent — placed near the end for chronology.
364        apply_peephole_slice7(&mut f.code);
365        // Slice 9 — fuse the *remaining* bare `LoadLocal + GetField`
366        // pairs (those slice 7/8 didn't consume): chain-head field
367        // reads (`r.x` in `r.x + r.y`), standalone `r.field` reads
368        // (`r.total`), and field reads feeding non-add/sub/mul ops.
369        // MUST run after slice 7/8 — otherwise it would greedily eat
370        // the `LoadLocal + GetField` prefix of an `acc OP r.field`
371        // triple and prevent the 3-op fusion. Slice 7/8's tombstone
372        // GetFields are preceded by their fused op (not a bare
373        // LoadLocal), so slice 9 never matches them.
374        apply_peephole_slice9(&mut f.code);
375    }
376
377    // Final pass: stamp every function with its content hash now that
378    // every body is finalized (#222). Trampolines installed via
379    // `install_trampoline` already have it; recomputing is cheap and
380    // makes the invariant easier to read at this top level.
381    for f in p.functions.iter_mut() {
382        if f.body_hash == crate::program::ZERO_BODY_HASH {
383            f.body_hash = crate::program::compute_body_hash(
384                f.arity, f.locals_count, &f.code, &pool.record_shapes);
385        }
386    }
387
388    p.constants = pool.pool;
389    p.record_shapes = pool.record_shapes;
390    p
391}
392
393/// Peephole pass: rewrite fusable opcode patterns into superinstructions
394/// (#461). Each fused op claims its own slot in the code stream; the
395/// trailing primitive ops it absorbs stay in place as inert
396/// "tombstones" — the dispatch loop overrides its default `pc += 1`
397/// to step past them. Leaving the tombstones in place keeps
398/// `code.len()` invariant and means we don't have to renumber jump
399/// offsets.
400///
401/// Pattern (slice 1): `LoadLocal(i), PushConst(c), IntAdd` where
402/// `constants[c]` is a `Const::Int`. Fused to
403/// `LoadLocalAddIntConst { local_idx: i, imm_const_idx: c }`.
404/// Safety: the second and third slots must not be reachable from
405/// any Jump / JumpIf / JumpIfNot — otherwise a jump would land on a
406/// tombstone instead of the live op the source intended. The
407/// pre-pass below collects every jump target in the function and
408/// skips fusion sites whose tombstones overlap one.
409/// #464 step 2 — rewrite `MakeRecord` to `AllocStackRecord` at sites
410/// the escape analysis (`crate::escape::build_escape_index`) proved
411/// non-escaping. Each rewrite is a single-slot swap that preserves
412/// pc, stack delta, and shape semantics — jump targets, the peephole
413/// passes downstream, and the body-hash decoder all see the same
414/// program shape they would have seen for the unlowered code.
415///
416/// Sites that escape are left as-is and still incur the
417/// IndexMap-backed heap allocation. Step 3 of #464 carries the
418/// bench acceptance bars (≥1.5× speedup on `response_build`); this
419/// pass is the precondition.
420fn apply_escape_lowering(
421    code: &mut [Op],
422    fn_name: &str,
423    escape_index: &std::collections::HashMap<(String, u32), bool>,
424) {
425    for (pc, op) in code.iter_mut().enumerate() {
426        // Look up this (fn, pc) in the escape index. Absent → analysis
427        // didn't observe the site (defensive: leave on heap path).
428        // Present and false → safe to stack-allocate. Each rewrite is a
429        // single-slot swap preserving pc / stack delta, so jump
430        // targets, downstream peephole passes, and the body-hash
431        // decoder all see the same program shape.
432        let key = (fn_name.to_string(), pc as u32);
433        if !matches!(escape_index.get(&key), Some(false)) {
434            continue;
435        }
436        match *op {
437            Op::MakeRecord { shape_idx, field_count } => {
438                *op = Op::AllocStackRecord { shape_idx, field_count };
439            }
440            // #464 tuple codegen: same single-slot swap as records.
441            Op::MakeTuple(arity) => {
442                *op = Op::AllocStackTuple { arity };
443            }
444            _ => {}
445        }
446    }
447}
448
449/// #463 slice 2b-i — rewrite `MakeRecord` / `MakeTuple` to the arena
450/// variants at sites the request-scope analysis
451/// (`crate::arena::build_arena_index`) proved do not escape the
452/// active `EffectHandler` arena scope.
453///
454/// Only fires on **remaining** `MakeRecord` / `MakeTuple` sites — the
455/// stack pass (`apply_escape_lowering`) runs first and converts the
456/// non-frame-escaping cheaper-tier sites. Sites that escape both the
457/// frame *and* the request stay as `MakeRecord` / `MakeTuple` (heap),
458/// untouched.
459///
460/// Each rewrite is the same single-slot swap as the stack lowering:
461/// pc / stack delta / shape semantics preserved, jump targets and
462/// downstream peephole passes see the same program shape, and
463/// `compute_body_hash` (#222) decodes both arena ops back to their
464/// legacy `MakeRecord` / `MakeTuple` form so closure identity is
465/// invariant.
466fn apply_arena_lowering(
467    code: &mut [Op],
468    fn_name: &str,
469    arena_index: &std::collections::HashMap<(String, u32), bool>,
470) {
471    for (pc, op) in code.iter_mut().enumerate() {
472        // arena_index value: true = arena-eligible. Absent or false
473        // → leave on heap (defensive default; absent means the
474        // analysis didn't observe the site).
475        let key = (fn_name.to_string(), pc as u32);
476        if !matches!(arena_index.get(&key), Some(true)) {
477            continue;
478        }
479        match *op {
480            Op::MakeRecord { shape_idx, field_count } => {
481                *op = Op::AllocArenaRecord { shape_idx, field_count };
482            }
483            Op::MakeTuple(arity) => {
484                *op = Op::AllocArenaTuple { arity };
485            }
486            _ => {}
487        }
488    }
489}
490
491fn apply_peephole(code: &mut [Op], constants: &[Const]) {
492    if code.len() < 3 { return; }
493    let jump_targets = collect_jump_targets(code);
494
495    let n = code.len();
496    let mut k = 0;
497    while k + 2 < n {
498        if let (Op::LoadLocal(local_idx), Op::PushConst(imm_const_idx), Op::IntAdd)
499            = (code[k], code[k + 1], code[k + 2])
500        {
501            let imm_is_int = matches!(
502                constants.get(imm_const_idx as usize),
503                Some(Const::Int(_))
504            );
505            // Tombstones at k+1 and k+2 must not be jump targets;
506            // k itself can be a target (it stays a live op — the
507            // fused form executes the same semantics in one step).
508            let safe = imm_is_int
509                && !jump_targets.contains(&(k + 1))
510                && !jump_targets.contains(&(k + 2));
511            if safe {
512                code[k] = Op::LoadLocalAddIntConst { local_idx, imm_const_idx };
513                k += 3;
514                continue;
515            }
516        }
517        k += 1;
518    }
519}
520
521/// Slice 2: fuse `[LoadLocalAddIntConst, _, _, StoreLocal(dest)]`
522/// into `LoadLocalAddIntConstStoreLocal { src, imm_const_idx, dest }`.
523/// The two `_` slots are slice-1 tombstones (the original PushConst
524/// and IntAdd) and stay in place as slice-2 tombstones too. The
525/// dispatch loop advances pc by 4 past all three trailing slots
526/// after executing the fused op.
527fn apply_peephole_slice2(code: &mut [Op]) {
528    if code.len() < 4 { return; }
529    let jump_targets = collect_jump_targets(code);
530
531    let n = code.len();
532    let mut k = 0;
533    while k + 3 < n {
534        if let (
535            Op::LoadLocalAddIntConst { local_idx: src, imm_const_idx },
536            _,
537            _,
538            Op::StoreLocal(dest),
539        ) = (code[k], code[k + 1], code[k + 2], code[k + 3])
540        {
541            // Slice-1 contract: code[k+1] is the original
542            // PushConst(imm_const_idx) and code[k+2] is the
543            // original IntAdd. We don't re-verify those — slice 1
544            // is the only producer of LoadLocalAddIntConst and
545            // always leaves the contract intact.
546            //
547            // Safety: tombstones at k+1..k+3 must not be reachable
548            // from any jump. k itself can be (it's still a live
549            // op carrying the same semantics).
550            let safe = !jump_targets.contains(&(k + 1))
551                && !jump_targets.contains(&(k + 2))
552                && !jump_targets.contains(&(k + 3));
553            if safe {
554                code[k] = Op::LoadLocalAddIntConstStoreLocal {
555                    src,
556                    imm_const_idx,
557                    dest,
558                };
559                k += 4;
560                continue;
561            }
562        }
563        k += 1;
564    }
565}
566
567/// Slice 3: fuse `[LoadLocal(lhs), LoadLocal(rhs), IntAdd]` into
568/// `LoadLocalAddLocal { lhs_idx, rhs_idx }`. The binary-op-on-two-
569/// locals idiom: any `a + b` where both operands compile to a
570/// `LoadLocal` (typed `Int`). Mirrors slice 1's shape exactly — the
571/// trailing `LoadLocal` + `IntAdd` stay in place as inert tombstones
572/// with cancelling stack deltas (+1, -1), so the verifier and
573/// body-hash decoder both keep walking them as live.
574///
575/// Disjoint from slice 1: the second slot disambiguates (LoadLocal
576/// vs PushConst), so a site can match at most one of the two. Runs
577/// after slice 2 so we don't accidentally consume a `LoadLocal` slot
578/// that slice 2 was about to fuse into a `*StoreLocal` superop (and
579/// to keep slice 2's input contract — slice-1 output followed by
580/// StoreLocal — untouched).
581///
582/// Safety: like slice 1, the trailing two slots must not be jump
583/// targets. The first slot can be a target (it stays a live op).
584fn apply_peephole_slice3(code: &mut [Op]) {
585    if code.len() < 3 { return; }
586    let jump_targets = collect_jump_targets(code);
587
588    let n = code.len();
589    let mut k = 0;
590    while k + 2 < n {
591        if let (Op::LoadLocal(lhs_idx), Op::LoadLocal(rhs_idx), Op::IntAdd)
592            = (code[k], code[k + 1], code[k + 2])
593        {
594            let safe = !jump_targets.contains(&(k + 1))
595                && !jump_targets.contains(&(k + 2));
596            if safe {
597                code[k] = Op::LoadLocalAddLocal { lhs_idx, rhs_idx };
598                k += 3;
599                continue;
600            }
601        }
602        k += 1;
603    }
604}
605
606/// Slice 4: slice 3 for `IntSub` and `IntMul`. Fuses
607/// `[LoadLocal(lhs), LoadLocal(rhs), IntSub]` to
608/// `LoadLocalSubLocal { lhs_idx, rhs_idx }` and the `IntMul` shape
609/// to `LoadLocalMulLocal`. Same tombstone, jump-safety, and
610/// body-hash story as slice 3 — the trailing two slots stay as
611/// inert primitives with cancelling stack deltas.
612///
613/// Disjoint from every prior slice: slice 1/2 require a `PushConst`
614/// at slot 2 (here it's `LoadLocal`), and slice 3's terminator is
615/// `IntAdd` (here it's `IntSub` / `IntMul`). A given site matches at
616/// most one slice.
617fn apply_peephole_slice4(code: &mut [Op]) {
618    if code.len() < 3 { return; }
619    let jump_targets = collect_jump_targets(code);
620
621    let n = code.len();
622    let mut k = 0;
623    while k + 2 < n {
624        if let (Op::LoadLocal(lhs_idx), Op::LoadLocal(rhs_idx), terminator)
625            = (code[k], code[k + 1], code[k + 2])
626        {
627            let fused = match terminator {
628                Op::IntSub => Some(Op::LoadLocalSubLocal { lhs_idx, rhs_idx }),
629                Op::IntMul => Some(Op::LoadLocalMulLocal { lhs_idx, rhs_idx }),
630                _ => None,
631            };
632            if let Some(fused_op) = fused {
633                let safe = !jump_targets.contains(&(k + 1))
634                    && !jump_targets.contains(&(k + 2));
635                if safe {
636                    code[k] = fused_op;
637                    k += 3;
638                    continue;
639                }
640            }
641        }
642        k += 1;
643    }
644}
645
646/// Slice 5: fuse the loop-condition idiom — 4-slot window
647/// `[LoadLocal, LoadLocal|PushConst, IntLt, JumpIfNot(offset)]` —
648/// into `LoadLocalLtLocalJumpIfNot` or `LoadLocalLtIntConstJumpIfNot`.
649/// First jump-aware peephole in this codebase: the fused op carries
650/// the JumpIfNot's offset and the VM dispatches directly to either
651/// `pc + 4` (condition true, fall through past tombstones) or
652/// `pc + 4 + offset` (condition false, original JumpIfNot target).
653///
654/// Safety conditions, on top of slice 1's "tombstones must not be
655/// jump targets":
656/// 1. Trailing 3 slots (k+1, k+2, k+3) must not be jump targets from
657///    elsewhere — same as slice 1/3/4, just three of them.
658/// 2. The slot at k+3 (JumpIfNot) is the one whose offset we copy
659///    into the fused op. The offset is relative to the JumpIfNot's
660///    `pc + 1` which equals `k + 4`, so the resolved target is
661///    `k + 4 + offset` — that target must be safe to land on (it
662///    already is, since JumpIfNot is operating as designed).
663/// 3. The const-int branch checks the PushConst points at a
664///    `Const::Int` — same as slice 1.
665fn apply_peephole_slice5(code: &mut [Op], constants: &[Const]) {
666    if code.len() < 4 { return; }
667    let jump_targets = collect_jump_targets(code);
668
669    let n = code.len();
670    let mut k = 0;
671    while k + 3 < n {
672        // Match the lhs slot — always a LoadLocal.
673        let lhs_idx = match code[k] {
674            Op::LoadLocal(i) => i,
675            _ => { k += 1; continue; }
676        };
677        // Match the rhs slot — either LoadLocal or PushConst(Int).
678        // The two flavors emit different fused ops.
679        let fused = match (code[k + 1], code[k + 2], code[k + 3]) {
680            (Op::PushConst(imm_const_idx), Op::IntEq, Op::JumpIfNot(jump_offset))
681                if matches!(constants.get(imm_const_idx as usize), Some(Const::Int(_))) =>
682                Some(Op::LoadLocalEqIntConstJumpIfNot {
683                    local_idx: lhs_idx, imm_const_idx, jump_offset,
684                }),
685            _ => None,
686        };
687        if let Some(fused_op) = fused {
688            let safe = !jump_targets.contains(&(k + 1))
689                && !jump_targets.contains(&(k + 2))
690                && !jump_targets.contains(&(k + 3));
691            if safe {
692                code[k] = fused_op;
693                k += 4;
694                continue;
695            }
696        }
697        k += 1;
698    }
699}
700
701/// Slice 6: fuse the match-scrutinee dance preceding a slice-5
702/// pattern-match arm test. 3-slot window
703/// `[LoadLocal(src), StoreLocal(dst),
704///   LoadLocalEqIntConstJumpIfNot { local_idx: dst, ... }]` —
705/// where the slice-5 op's `local_idx` matches the StoreLocal's
706/// destination — rewrites to
707/// `LoadLocalStoreEqIntConstJumpIfNot { src, dst, ... }` at slot k.
708/// The fused op carries `dst` so it can mirror the original
709/// StoreLocal (later arm tests in the same match keep reading
710/// `locals[dst]`).
711///
712/// Trailing tombstones: 5 slots (the original StoreLocal + the
713/// slice-5 fused op itself + slice 5's 3 primitive tombstones).
714/// VM dispatch skips them via `pc + 6`; verifier override pushes
715/// `(pc + 6, ...)` and the branch target `(pc + 6 + jump_offset, ...)`
716/// — the offset is identical to what slice 5 stored (still relative
717/// to the original JumpIfNot's `pc + 1`, now at `k + 5 + 1 = k + 6`).
718///
719/// Safety: slots k+1..=k+5 must not be jump targets — same window
720/// safety as the other slices. Slice 5 already verified k+3..=k+5
721/// weren't jump targets when it fused; slice 6 only needs to re-check
722/// k+1 (the StoreLocal) and k+2 (the slice-5 fused op).
723fn apply_peephole_slice6(code: &mut [Op]) {
724    if code.len() < 3 { return; }
725    let jump_targets = collect_jump_targets(code);
726
727    let n = code.len();
728    let mut k = 0;
729    while k + 2 < n {
730        if let (
731            Op::LoadLocal(src),
732            Op::StoreLocal(dst),
733            Op::LoadLocalEqIntConstJumpIfNot { local_idx, imm_const_idx, jump_offset },
734        ) = (code[k], code[k + 1], code[k + 2]) {
735            // The slice-5 op must read the very local the StoreLocal
736            // just wrote; if it reads some other local this isn't the
737            // match-scrutinee idiom (could be a coincidental sequence).
738            if local_idx == dst {
739                let safe = !jump_targets.contains(&(k + 1))
740                    && !jump_targets.contains(&(k + 2));
741                if safe {
742                    code[k] = Op::LoadLocalStoreEqIntConstJumpIfNot {
743                        src, dst, imm_const_idx, jump_offset,
744                    };
745                    // Skip past this slice-6 window. The slice-5
746                    // tombstones at k+3..=k+5 are already handled by
747                    // slice 5's earlier rewrite; we don't need to
748                    // touch them.
749                    k += 3;
750                    continue;
751                }
752            }
753        }
754        k += 1;
755    }
756}
757
758/// Slice 7/8: fuse `[LoadLocal(local_idx), GetField{name_idx,
759/// site_idx}, IntAdd|IntSub|IntMul]` into the matching
760/// `LoadLocalGetField{Add,Sub,Mul} { local_idx, name_idx, site_idx }`.
761///
762/// Fires on the `acc OP r.field` accumulator-with-field-read idiom —
763/// the bytecode the compiler emits for `prev_expr OP record.field`
764/// once `prev_expr` is on the stack. Common in handler-shaped code
765/// like `r.x + r.y + r.z` (the LHS of each operator after the first
766/// matches this pattern), `acc + items[i].weight` reductions, and
767/// the `v.l - v.m` / `v.h * v.k` mixes the `response_build` profile
768/// exercises.
769///
770/// Disjoint from every prior slice: slice 1 wants `PushConst` at
771/// slot 1; slices 3-4 want `LoadLocal` at slot 1; slice 5 wants
772/// a 4-slot window with `IntEq + JumpIfNot` terminator. Only this
773/// slice matches a `GetField` at slot 1.
774///
775/// Order: must run after slice 4 (so the disjointness analysis
776/// holds — slice 3/4 patterns with a trailing IntAdd / IntSub /
777/// IntMul never carry a GetField at slot 1 and don't compete);
778/// must run before / independent of slice 5/6, which don't match
779/// any slot in this window.
780///
781/// Safety: trailing two slots (the original `GetField` and the
782/// arithmetic op) must not be jump targets. The first slot can be.
783fn apply_peephole_slice7(code: &mut [Op]) {
784    if code.len() < 3 { return; }
785    let jump_targets = collect_jump_targets(code);
786
787    let n = code.len();
788    let mut k = 0;
789    while k + 2 < n {
790        if let (Op::LoadLocal(local_idx), Op::GetField { name_idx, site_idx })
791            = (code[k], code[k + 1])
792        {
793            let fused = match code[k + 2] {
794                Op::IntAdd => Some(Op::LoadLocalGetFieldAdd { local_idx, name_idx, site_idx }),
795                Op::IntSub => Some(Op::LoadLocalGetFieldSub { local_idx, name_idx, site_idx }),
796                Op::IntMul => Some(Op::LoadLocalGetFieldMul { local_idx, name_idx, site_idx }),
797                _ => None,
798            };
799            if let Some(op) = fused {
800                let safe = !jump_targets.contains(&(k + 1))
801                    && !jump_targets.contains(&(k + 2));
802                if safe {
803                    code[k] = op;
804                    k += 3;
805                    continue;
806                }
807            }
808        }
809        k += 1;
810    }
811}
812
813/// Slice 9: fuse the bare `[LoadLocal(local_idx), GetField{name_idx,
814/// site_idx}]` pair into `LoadLocalGetField { local_idx, name_idx,
815/// site_idx }` — the plain `record.field` read, the most common
816/// field-access shape.
817///
818/// The win is allocation, not just one fewer dispatch: the unfused
819/// pair clones the entire record onto the value stack (a
820/// `Box<IndexMap>` for a heap record) only to read one field; the
821/// fused op reads the field out of the local by reference and clones
822/// only that value. On `response_build` the whole-record clone of the
823/// returned `Response` (`r.total`) was the dominant malloc source.
824///
825/// Order: MUST run after slice 7/8. Those fuse `[LoadLocal, GetField,
826/// IntAdd|IntSub|IntMul]`; if slice 9 ran first it would consume the
827/// `LoadLocal + GetField` prefix and block the 3-op fusion. After
828/// slice 7/8, the only remaining `[LoadLocal, GetField]` pairs are
829/// the ones they didn't want (chain heads, standalone reads, field
830/// reads feeding other ops). Slice 7/8's tombstone GetFields sit
831/// after their fused op, never after a bare `LoadLocal`, so slice 9
832/// won't touch them.
833///
834/// Safety: the trailing slot (the original `GetField`) must not be a
835/// jump target. The first slot can be.
836fn apply_peephole_slice9(code: &mut [Op]) {
837    if code.len() < 2 { return; }
838    let jump_targets = collect_jump_targets(code);
839
840    let n = code.len();
841    let mut k = 0;
842    while k + 1 < n {
843        if let (Op::LoadLocal(local_idx), Op::GetField { name_idx, site_idx })
844            = (code[k], code[k + 1])
845        {
846            if !jump_targets.contains(&(k + 1)) {
847                code[k] = Op::LoadLocalGetField { local_idx, name_idx, site_idx };
848                k += 2;
849                continue;
850            }
851        }
852        k += 1;
853    }
854}
855
856fn collect_jump_targets(code: &[Op]) -> std::collections::HashSet<usize> {
857    let mut targets = std::collections::HashSet::new();
858    for (pc, op) in code.iter().enumerate() {
859        let off = match op {
860            Op::Jump(off) | Op::JumpIf(off) | Op::JumpIfNot(off) => Some(*off),
861            _ => None,
862        };
863        if let Some(off) = off {
864            let target = (pc as i32 + 1 + off) as usize;
865            targets.insert(target);
866        }
867    }
868    targets
869}
870
871#[derive(Debug, Clone)]
872struct PendingLambda {
873    fn_id: u32,
874    /// Names of captured outer-scope locals, in order.
875    capture_names: Vec<String>,
876    params: Vec<a::Param>,
877    body: a::CExpr,
878}
879
880struct FnCompiler<'a> {
881    code: Vec<Op>,
882    locals: IndexMap<String, u16>,
883    next_local: u16,
884    /// Peak local usage seen during compilation (for VM frame sizing).
885    peak_local: u16,
886    /// Inferred numeric type of each local for typed numeric-op
887    /// lowering (#461). Populated when binding function parameters
888    /// (from their declared `TypeExpr::Named { name: "Int", .. }`
889    /// or `"Float"`) and when binding `let name := value` where
890    /// the RHS classifies statically. Used by `compile_binop` to
891    /// emit `Op::IntAdd` / `Op::FloatAdd` instead of the
892    /// polymorphic `Op::NumAdd` when both operands' types are
893    /// statically known. Conservative: falls back to `NumTy::Unknown`
894    /// (and the polymorphic op) whenever a type isn't locally
895    /// derivable.
896    ///
897    /// Keyed by local *name* (parallel to `locals`) rather than by
898    /// slot index so shadowed bindings are handled correctly via
899    /// `IndexMap`'s insertion-order semantics.
900    local_types: IndexMap<String, NumTy>,
901    /// Per-local map of statically-known field types (#461 slice 7).
902    /// Populated when a local is bound from a `RecordLit` whose
903    /// fields all classify to non-`Unknown` `NumTy`s. Lets
904    /// `classify_expr(FieldAccess { value: Var(name), field })`
905    /// return a precise `NumTy` instead of falling back to
906    /// `Unknown` — which in turn unlocks the typed-Add lowering
907    /// (`+` over two Ints → `IntAdd`) on `r.field + r.field`
908    /// chains, which slice 7 then fuses into
909    /// `LoadLocalGetFieldAdd`.
910    ///
911    /// Only the literal-binding case is tracked here; annotated
912    /// `let r :: R := ...` would require resolving the type alias
913    /// `R` to its field-type map, which the compiler doesn't yet
914    /// have. Future slice work.
915    local_record_field_types: IndexMap<String, IndexMap<String, NumTy>>,
916    /// Per-function counter for `Op::GetField` site indices (#462
917    /// slice 1). Each `Op::GetField` emit allocates the next index
918    /// here, giving every field-access site within this function a
919    /// stable identifier independent of pc. The VM uses
920    /// `(fn_id, site_idx)` as the inline-cache key, so the cache
921    /// survives the future dispatch rewrite (#461) and a JIT (#465).
922    field_get_sites: u32,
923    pool: &'a mut ConstPool,
924    function_names: &'a IndexMap<String, u32>,
925    module_aliases: &'a IndexMap<String, String>,
926    /// CExpr address → NodeId, populated per stage via `lex_ast::expr_ids`.
927    id_map: &'a std::collections::HashMap<*const a::CExpr, lex_ast::NodeId>,
928    /// Queue of lambdas discovered during compilation; each gets a fresh
929    /// fn_id and is compiled in a later pass.
930    pending_lambdas: &'a mut Vec<PendingLambda>,
931    /// Mutable view of the function table — used to allocate fn_ids for
932    /// freshly-discovered lambdas.
933    next_fn_id: &'a mut Vec<Function>,
934}
935
936/// Lightweight numeric-type classification used by `compile_binop`
937/// to decide whether to emit `IntAdd` / `FloatAdd` (specialized,
938/// fast) or `NumAdd` (polymorphic, runtime-typed dispatch). #461
939/// typed-lowering pass — conservative: anything not provably one
940/// of these returns `Unknown` and falls back to the polymorphic op.
941#[derive(Debug, Clone, Copy, PartialEq, Eq)]
942enum NumTy { Int, Float, Unknown }
943
944/// #461 slice 7: extract a `field_name -> NumTy` map from a record
945/// type expression. Resolves named types (`r :: R`) via
946/// `type_aliases` and returns `None` if the type ultimately isn't
947/// a record literal.
948fn record_field_types(
949    ty: &a::TypeExpr,
950    type_aliases: &IndexMap<String, a::TypeExpr>,
951) -> Option<IndexMap<String, NumTy>> {
952    match ty {
953        a::TypeExpr::Record { fields } => {
954            let mut m = IndexMap::new();
955            for f in fields {
956                m.insert(f.name.clone(), classify_type_expr(&f.ty));
957            }
958            Some(m)
959        }
960        a::TypeExpr::Refined { base, .. } => record_field_types(base, type_aliases),
961        a::TypeExpr::Named { name, args } if args.is_empty() => {
962            // Resolve the alias and recurse. Cycle protection isn't
963            // needed here — a cyclic type alias would have been
964            // rejected by `lex-types::check_program` upstream.
965            type_aliases.get(name).and_then(|t| record_field_types(t, type_aliases))
966        }
967        _ => None,
968    }
969}
970
971fn classify_type_expr(ty: &a::TypeExpr) -> NumTy {
972    match ty {
973        a::TypeExpr::Named { name, args } if args.is_empty() => match name.as_str() {
974            "Int" => NumTy::Int,
975            "Float" => NumTy::Float,
976            _ => NumTy::Unknown,
977        },
978        // `Refined { base, .. }` (#209) — classify by the base type;
979        // the refinement predicate doesn't change the value's primitive shape.
980        a::TypeExpr::Refined { base, .. } => classify_type_expr(base),
981        _ => NumTy::Unknown,
982    }
983}
984
985impl<'a> FnCompiler<'a> {
986    fn alloc_local(&mut self, name: &str) -> u16 {
987        let i = self.next_local;
988        self.locals.insert(name.into(), i);
989        self.next_local += 1;
990        if self.next_local > self.peak_local { self.peak_local = self.next_local; }
991        i
992    }
993    fn emit(&mut self, op: Op) { self.code.push(op); }
994
995    fn compile_expr(&mut self, e: &a::CExpr, tail: bool) {
996        match e {
997            a::CExpr::Literal { value } => self.compile_lit(value),
998            a::CExpr::Var { name } => {
999                if let Some(slot) = self.locals.get(name) {
1000                    self.emit(Op::LoadLocal(*slot));
1001                } else if let Some(&fn_id) = self.function_names.get(name) {
1002                    // Function name used as a *value* (e.g. as a record-field
1003                    // initializer or fold-callback arg) — materialize it as a
1004                    // closure with no captures. The runtime already accepts
1005                    // `Value::Closure { fn_id, captures: vec![] }` and
1006                    // `CallClosure` dispatches it. (#169)
1007                    self.emit(Op::MakeClosure { fn_id, capture_count: 0 });
1008                } else {
1009                    // Should be caught at type-check time; the type checker
1010                    // walks every Var. If we land here it's a compiler bug,
1011                    // not a user typo.
1012                    panic!("unknown var in compiler: {name}");
1013                }
1014            }
1015            a::CExpr::Let { name, ty, value, body } => {
1016                // Classify the RHS for typed-op lowering (#461). Prefer
1017                // the declared annotation when present (cheap O(1)
1018                // lookup); fall back to classifying the value
1019                // expression structurally.
1020                let nty = match ty {
1021                    Some(t) => classify_type_expr(t),
1022                    None => self.classify_expr(value),
1023                };
1024                // #461 slice 7: when the RHS is a record literal,
1025                // remember the field types so `name.field` accesses
1026                // downstream can classify precisely. Without this,
1027                // `r.x + r.y` falls through to `NumAdd`, blocking
1028                // the slice-7 fusion.
1029                if let a::CExpr::RecordLit { fields } = value.as_ref() {
1030                    let mut ftypes = IndexMap::new();
1031                    for f in fields {
1032                        let fty = self.classify_expr(&f.value);
1033                        ftypes.insert(f.name.clone(), fty);
1034                    }
1035                    self.local_record_field_types.insert(name.clone(), ftypes);
1036                }
1037                self.compile_expr(value, false);
1038                let slot = self.alloc_local(name);
1039                self.local_types.insert(name.clone(), nty);
1040                self.emit(Op::StoreLocal(slot));
1041                self.compile_expr(body, tail);
1042            }
1043            a::CExpr::Block { statements, result } => {
1044                for s in statements {
1045                    self.compile_expr(s, false);
1046                    self.emit(Op::Pop);
1047                }
1048                self.compile_expr(result, tail);
1049            }
1050            a::CExpr::Call { callee, args } => self.compile_call(e, callee, args, tail),
1051            a::CExpr::Constructor { name, args } => {
1052                for a in args { self.compile_expr(a, false); }
1053                let name_idx = self.pool.variant(name);
1054                self.emit(Op::MakeVariant { name_idx, arity: args.len() as u16 });
1055            }
1056            a::CExpr::Match { scrutinee, arms } => self.compile_match(scrutinee, arms, tail),
1057            a::CExpr::RecordLit { fields } => {
1058                let mut idxs = Vec::with_capacity(fields.len());
1059                for f in fields {
1060                    self.compile_expr(&f.value, false);
1061                    idxs.push(self.pool.field(&f.name));
1062                }
1063                let field_count = idxs.len() as u16;
1064                let shape_idx = self.pool.record_shape(idxs);
1065                self.emit(Op::MakeRecord { shape_idx, field_count });
1066            }
1067            a::CExpr::TupleLit { items } => {
1068                for it in items { self.compile_expr(it, false); }
1069                self.emit(Op::MakeTuple(items.len() as u16));
1070            }
1071            a::CExpr::ListLit { items } => {
1072                for it in items { self.compile_expr(it, false); }
1073                self.emit(Op::MakeList(items.len() as u32));
1074            }
1075            a::CExpr::FieldAccess { value, field } => {
1076                self.compile_expr(value, false);
1077                let name_idx = self.pool.field(field);
1078                let site_idx = self.field_get_sites;
1079                self.field_get_sites += 1;
1080                self.emit(Op::GetField { name_idx, site_idx });
1081            }
1082            a::CExpr::BinOp { op, lhs, rhs } => self.compile_binop(op, lhs, rhs),
1083            a::CExpr::UnaryOp { op, expr } => {
1084                self.compile_expr(expr, false);
1085                match op.as_str() {
1086                    "-" => self.emit(Op::NumNeg),
1087                    "not" => self.emit(Op::BoolNot),
1088                    other => panic!("unknown unary: {other}"),
1089                }
1090            }
1091            a::CExpr::Lambda { params, body, .. } => self.compile_lambda(params, body),
1092            a::CExpr::Return { value } => {
1093                self.compile_expr(value, true);
1094                self.emit(Op::Return);
1095            }
1096        }
1097    }
1098
1099    fn compile_lit(&mut self, l: &a::CLit) {
1100        let i = match l {
1101            a::CLit::Int { value } => self.pool.int(*value),
1102            a::CLit::Bool { value } => self.pool.bool(*value),
1103            a::CLit::Float { value } => {
1104                let f: f64 = value.parse().unwrap_or(0.0);
1105                self.pool.float(f)
1106            }
1107            a::CLit::Str { value } => self.pool.str(value),
1108            a::CLit::Bytes { value: _ } => {
1109                // Stub: M4 doesn't use bytes literals in §3.13 examples.
1110                let i = self.pool.pool.len() as u32;
1111                self.pool.pool.push(Const::Bytes(Vec::new()));
1112                i
1113            }
1114            a::CLit::Unit => self.pool.unit(),
1115        };
1116        self.emit(Op::PushConst(i));
1117    }
1118
1119    fn compile_call(&mut self, call_expr: &a::CExpr, callee: &a::CExpr, args: &[a::CExpr], tail: bool) {
1120        let node_id = self
1121            .id_map
1122            .get(&(call_expr as *const a::CExpr))
1123            .map(|n| n.as_str().to_string())
1124            .unwrap_or_else(|| "n_?".into());
1125        let node_id_idx = self.pool.node_id(&node_id);
1126
1127        // Module function call: `alias.op(args)` where `alias` is an imported
1128        // module ⇒ EffectCall, except for higher-order pure ops where we
1129        // emit inline bytecode using CallClosure (the closure-arg can't be
1130        // serialized through the effect handler).
1131        if let a::CExpr::FieldAccess { value, field } = callee {
1132            if let a::CExpr::Var { name } = value.as_ref() {
1133                if let Some(module) = self.module_aliases.get(name) {
1134                    if self.try_emit_higher_order(module, field, args, node_id_idx) {
1135                        let _ = tail;
1136                        return;
1137                    }
1138                    for a in args { self.compile_expr(a, false); }
1139                    let kind_idx = self.pool.str(module);
1140                    let op_idx = self.pool.str(field);
1141                    self.emit(Op::EffectCall {
1142                        kind_idx,
1143                        op_idx,
1144                        arity: args.len() as u16,
1145                        node_id_idx,
1146                    });
1147                    let _ = tail;
1148                    return;
1149                }
1150            }
1151        }
1152        match callee {
1153            a::CExpr::Var { name } if self.function_names.contains_key(name) => {
1154                for a in args { self.compile_expr(a, false); }
1155                let fn_id = self.function_names[name];
1156                if tail {
1157                    self.emit(Op::TailCall { fn_id, arity: args.len() as u16, node_id_idx });
1158                } else {
1159                    self.emit(Op::Call { fn_id, arity: args.len() as u16, node_id_idx });
1160                }
1161            }
1162            a::CExpr::Var { name } if self.locals.contains_key(name) => {
1163                // First-class function value bound to a local. Push the
1164                // closure, then args, then CallClosure.
1165                let slot = self.locals[name];
1166                self.emit(Op::LoadLocal(slot));
1167                for a in args { self.compile_expr(a, false); }
1168                self.emit(Op::CallClosure { arity: args.len() as u16, node_id_idx });
1169            }
1170            // Lambda directly applied — push closure + args + CallClosure.
1171            other => {
1172                self.compile_expr(other, false);
1173                for a in args { self.compile_expr(a, false); }
1174                self.emit(Op::CallClosure { arity: args.len() as u16, node_id_idx });
1175            }
1176        }
1177    }
1178
1179    fn compile_binop(&mut self, op: &str, lhs: &a::CExpr, rhs: &a::CExpr) {
1180        // #461 typed lowering: if we can statically prove both
1181        // operands are the same numeric type, emit the typed
1182        // primitive (`IntAdd` / `FloatAdd`) instead of the
1183        // polymorphic `NumAdd` that runtime-matches on operand
1184        // shape. The fast path skips one match per arithmetic op
1185        // *and* unblocks downstream peephole fusions (slice 1)
1186        // that scan for typed primitives. Conservative fallback
1187        // to the polymorphic op when either side classifies as
1188        // `Unknown`, so correctness for `Float` / mixed code is
1189        // unchanged.
1190        let lhs_ty = self.classify_expr(lhs);
1191        let rhs_ty = self.classify_expr(rhs);
1192        let typed = match (lhs_ty, rhs_ty) {
1193            (NumTy::Int, NumTy::Int) => NumTy::Int,
1194            (NumTy::Float, NumTy::Float) => NumTy::Float,
1195            _ => NumTy::Unknown,
1196        };
1197        self.compile_expr(lhs, false);
1198        self.compile_expr(rhs, false);
1199        match (op, typed) {
1200            ("+",  NumTy::Int)     => self.emit(Op::IntAdd),
1201            ("+",  NumTy::Float)   => self.emit(Op::FloatAdd),
1202            ("+",  NumTy::Unknown) => self.emit(Op::NumAdd),
1203            ("-",  NumTy::Int)     => self.emit(Op::IntSub),
1204            ("-",  NumTy::Float)   => self.emit(Op::FloatSub),
1205            ("-",  NumTy::Unknown) => self.emit(Op::NumSub),
1206            ("*",  NumTy::Int)     => self.emit(Op::IntMul),
1207            ("*",  NumTy::Float)   => self.emit(Op::FloatMul),
1208            ("*",  NumTy::Unknown) => self.emit(Op::NumMul),
1209            ("/",  NumTy::Int)     => self.emit(Op::IntDiv),
1210            ("/",  NumTy::Float)   => self.emit(Op::FloatDiv),
1211            ("/",  NumTy::Unknown) => self.emit(Op::NumDiv),
1212            // Int has %; Float doesn't (NumMod will reject at runtime).
1213            ("%",  NumTy::Int)     => self.emit(Op::IntMod),
1214            ("%",  _)              => self.emit(Op::NumMod),
1215            ("==", NumTy::Int)     => self.emit(Op::IntEq),
1216            ("==", NumTy::Float)   => self.emit(Op::FloatEq),
1217            ("==", NumTy::Unknown) => self.emit(Op::NumEq),
1218            ("!=", NumTy::Int)     => { self.emit(Op::IntEq);   self.emit(Op::BoolNot); }
1219            ("!=", NumTy::Float)   => { self.emit(Op::FloatEq); self.emit(Op::BoolNot); }
1220            ("!=", NumTy::Unknown) => { self.emit(Op::NumEq);   self.emit(Op::BoolNot); }
1221            ("<",  NumTy::Int)     => self.emit(Op::IntLt),
1222            ("<",  NumTy::Float)   => self.emit(Op::FloatLt),
1223            ("<",  NumTy::Unknown) => self.emit(Op::NumLt),
1224            ("<=", NumTy::Int)     => self.emit(Op::IntLe),
1225            ("<=", NumTy::Float)   => self.emit(Op::FloatLe),
1226            ("<=", NumTy::Unknown) => self.emit(Op::NumLe),
1227            (">",  NumTy::Int)     => { self.emit_swap_top2(); self.emit(Op::IntLt); }
1228            (">",  NumTy::Float)   => { self.emit_swap_top2(); self.emit(Op::FloatLt); }
1229            (">",  NumTy::Unknown) => { self.emit_swap_top2(); self.emit(Op::NumLt); }
1230            (">=", NumTy::Int)     => { self.emit_swap_top2(); self.emit(Op::IntLe); }
1231            (">=", NumTy::Float)   => { self.emit_swap_top2(); self.emit(Op::FloatLe); }
1232            (">=", NumTy::Unknown) => { self.emit_swap_top2(); self.emit(Op::NumLe); }
1233            ("and", _) => self.emit(Op::BoolAnd),
1234            ("or",  _) => self.emit(Op::BoolOr),
1235            (other, _) => panic!("unknown binop: {other:?}"),
1236        }
1237    }
1238
1239    /// Classify an expression's static numeric type for #461 typed
1240    /// lowering. Strictly conservative: only returns `Int` / `Float`
1241    /// when the type is locally derivable from a literal, an
1242    /// already-classified local, or a binary op on two same-typed
1243    /// operands. Everything else (function calls, field access,
1244    /// match expressions, ...) falls back to `Unknown` and the
1245    /// polymorphic NumAdd-family op.
1246    fn classify_expr(&self, e: &a::CExpr) -> NumTy {
1247        match e {
1248            a::CExpr::Literal { value: a::CLit::Int { .. } } => NumTy::Int,
1249            a::CExpr::Literal { value: a::CLit::Float { .. } } => NumTy::Float,
1250            a::CExpr::Var { name } =>
1251                self.local_types.get(name).copied().unwrap_or(NumTy::Unknown),
1252            a::CExpr::BinOp { op, lhs, rhs } => {
1253                // Numeric ops preserve the operand type (Int+Int=Int,
1254                // Float+Float=Float). Comparison/logical ops yield
1255                // Bool, not a numeric type — return Unknown.
1256                let is_numeric = matches!(op.as_str(), "+" | "-" | "*" | "/" | "%");
1257                if !is_numeric { return NumTy::Unknown; }
1258                match (self.classify_expr(lhs), self.classify_expr(rhs)) {
1259                    (NumTy::Int, NumTy::Int) => NumTy::Int,
1260                    (NumTy::Float, NumTy::Float) => NumTy::Float,
1261                    _ => NumTy::Unknown,
1262                }
1263            }
1264            a::CExpr::UnaryOp { op, expr } if op == "-" => self.classify_expr(expr),
1265            // #461 slice 7: `r.field` access where `r` is a local
1266            // bound from a record literal. Reads the per-local
1267            // field-type map populated at the let-binding site.
1268            // Unknown otherwise (record argument with `:: R`
1269            // annotation, helper-returned record, etc.) — those
1270            // would need type-alias resolution to classify.
1271            a::CExpr::FieldAccess { value, field } => {
1272                if let a::CExpr::Var { name } = value.as_ref() {
1273                    if let Some(ftypes) = self.local_record_field_types.get(name) {
1274                        return ftypes.get(field).copied().unwrap_or(NumTy::Unknown);
1275                    }
1276                }
1277                NumTy::Unknown
1278            }
1279            // Let-expressions: the let-binding mutates `local_types`
1280            // *during* compile_expr; classifying ahead of time would
1281            // require simulating that. Conservative fallback.
1282            _ => NumTy::Unknown,
1283        }
1284    }
1285
1286    fn emit_swap_top2(&mut self) {
1287        let a = self.alloc_local("__swap_a");
1288        let b = self.alloc_local("__swap_b");
1289        self.emit(Op::StoreLocal(b));
1290        self.emit(Op::StoreLocal(a));
1291        self.emit(Op::LoadLocal(b));
1292        self.emit(Op::LoadLocal(a));
1293    }
1294
1295    fn compile_match(&mut self, scrutinee: &a::CExpr, arms: &[a::Arm], tail: bool) {
1296        self.compile_expr(scrutinee, false);
1297        let scrut_slot = self.alloc_local("__scrut");
1298        self.emit(Op::StoreLocal(scrut_slot));
1299
1300        let mut end_jumps: Vec<usize> = Vec::new();
1301        for arm in arms {
1302            let arm_start_locals = self.next_local;
1303            let arm_start_locals_map = self.locals.clone();
1304
1305            self.emit(Op::LoadLocal(scrut_slot));
1306            let mut bindings: Vec<(String, u16)> = Vec::new();
1307            let fail_jumps: Vec<usize> = self.compile_pattern_test(&arm.pattern, &mut bindings);
1308
1309            self.compile_expr(&arm.body, tail);
1310            let j_end = self.code.len();
1311            self.emit(Op::Jump(0));
1312            end_jumps.push(j_end);
1313
1314            let fail_target = self.code.len() as i32;
1315            for j in fail_jumps {
1316                // #337: PConstructor patterns now register an
1317                // unconditional `Op::Jump` for the failure path
1318                // (alongside the existing `Op::JumpIfNot` from
1319                // PLiteral / nested constructor tests). Patch
1320                // either shape.
1321                match &mut self.code[j] {
1322                    Op::JumpIfNot(off) => *off = fail_target - (j as i32 + 1),
1323                    Op::Jump(off)      => *off = fail_target - (j as i32 + 1),
1324                    _ => {}
1325                }
1326            }
1327            self.next_local = arm_start_locals;
1328            self.locals = arm_start_locals_map;
1329        }
1330        let panic_msg_idx = self.pool.str("non-exhaustive match");
1331        self.emit(Op::Panic(panic_msg_idx));
1332
1333        let end_target = self.code.len() as i32;
1334        for j in end_jumps {
1335            if let Op::Jump(off) = &mut self.code[j] {
1336                *off = end_target - (j as i32 + 1);
1337            }
1338        }
1339    }
1340
1341    fn compile_pattern_test(&mut self, p: &a::Pattern, bindings: &mut Vec<(String, u16)>) -> Vec<usize> {
1342        let mut fails = Vec::new();
1343        match p {
1344            a::Pattern::PWild => { self.emit(Op::Pop); }
1345            a::Pattern::PVar { name } => {
1346                let slot = self.alloc_local(name);
1347                self.emit(Op::StoreLocal(slot));
1348                bindings.push((name.clone(), slot));
1349            }
1350            a::Pattern::PLiteral { value } => {
1351                self.compile_lit(value);
1352                match value {
1353                    a::CLit::Str { .. } => self.emit(Op::StrEq),
1354                    a::CLit::Bytes { .. } => self.emit(Op::BytesEq),
1355                    // Typed-lowering for numeric literal patterns
1356                    // (#461 slice 5 prerequisite). The pattern only
1357                    // reaches its test when the scrutinee has the
1358                    // literal's type (the type checker rejects
1359                    // mismatches), so emit the type-specific Eq.
1360                    // The body_hash decoder lowers IntEq/FloatEq to
1361                    // NumEq at hash time so closure identity (#222)
1362                    // is unchanged. Enables slice 5's
1363                    // `LoadLocal + PushConst + IntEq + JumpIfNot`
1364                    // peephole to fire on pattern-match arm tests.
1365                    a::CLit::Int { .. } => self.emit(Op::IntEq),
1366                    a::CLit::Float { .. } => self.emit(Op::FloatEq),
1367                    _ => self.emit(Op::NumEq),
1368                }
1369                let j = self.code.len();
1370                self.emit(Op::JumpIfNot(0));
1371                fails.push(j);
1372            }
1373            a::Pattern::PConstructor { name, args } => {
1374                let name_idx = self.pool.variant(name);
1375                // #337: the failure path must drop the duplicated
1376                // scrutinee so subsequent match arms see a clean
1377                // stack. The previous shape
1378                //   Dup; TestVariant; JumpIfNot(fail);
1379                // left `[scrut]` on the stack at the fail target,
1380                // poisoning later arms — e.g. a wildcard `_` arm
1381                // whose body referenced an unrelated value would
1382                // pop the leaked scrutinee instead of its own value.
1383                //
1384                // New shape: branch on success, fall through to a
1385                // failure cleanup that pops the dup'd scrutinee
1386                // before jumping. The registered fail-jump is an
1387                // unconditional `Op::Jump`; `compile_match`'s patch
1388                // loop accepts both `JumpIfNot` and `Jump`.
1389                self.emit(Op::Dup);                   // [scrut, scrut]
1390                self.emit(Op::TestVariant(name_idx)); // [scrut, Bool]
1391                let j_success = self.code.len();
1392                self.emit(Op::JumpIf(0));             // pop Bool. success → [scrut]
1393                self.emit(Op::Pop);                   // failure cleanup: [scrut] → []
1394                let j_fail = self.code.len();
1395                self.emit(Op::Jump(0));               // → fail target with []
1396                fails.push(j_fail);
1397                let success_target = self.code.len() as i32;
1398                if let Op::JumpIf(off) = &mut self.code[j_success] {
1399                    *off = success_target - (j_success as i32 + 1);
1400                }
1401                if args.is_empty() {
1402                    self.emit(Op::Pop);
1403                } else if args.len() == 1 {
1404                    self.emit(Op::GetVariantArg(0));
1405                    let sub_fails = self.compile_pattern_test(&args[0], bindings);
1406                    fails.extend(sub_fails);
1407                } else {
1408                    let slot = self.alloc_local("__variant");
1409                    self.emit(Op::StoreLocal(slot));
1410                    for (i, arg) in args.iter().enumerate() {
1411                        self.emit(Op::LoadLocal(slot));
1412                        self.emit(Op::GetVariantArg(i as u16));
1413                        let sub_fails = self.compile_pattern_test(arg, bindings);
1414                        fails.extend(sub_fails);
1415                    }
1416                }
1417            }
1418            a::Pattern::PRecord { fields } => {
1419                let slot = self.alloc_local("__record");
1420                self.emit(Op::StoreLocal(slot));
1421                for f in fields {
1422                    self.emit(Op::LoadLocal(slot));
1423                    let name_idx = self.pool.field(&f.name);
1424                    let site_idx = self.field_get_sites;
1425                    self.field_get_sites += 1;
1426                    self.emit(Op::GetField { name_idx, site_idx });
1427                    let sub_fails = self.compile_pattern_test(&f.pattern, bindings);
1428                    fails.extend(sub_fails);
1429                }
1430            }
1431            a::Pattern::PTuple { items } => {
1432                let slot = self.alloc_local("__tuple");
1433                self.emit(Op::StoreLocal(slot));
1434                for (i, item) in items.iter().enumerate() {
1435                    self.emit(Op::LoadLocal(slot));
1436                    self.emit(Op::GetElem(i as u16));
1437                    let sub_fails = self.compile_pattern_test(item, bindings);
1438                    fails.extend(sub_fails);
1439                }
1440            }
1441        }
1442        fails
1443    }
1444
1445    /// Compile a Lambda: collect free variables that resolve to outer-scope
1446    /// locals, register a synthetic function, emit MakeClosure with the
1447    /// captured values pushed in order.
1448    fn compile_lambda(&mut self, params: &[a::Param], body: &a::CExpr) {
1449        // Free vars = vars referenced in body that aren't bound locally.
1450        let mut bound: std::collections::HashSet<String> = params.iter().map(|p| p.name.clone()).collect();
1451        let mut frees: Vec<String> = Vec::new();
1452        free_vars(body, &mut bound, &mut frees);
1453
1454        // Filter to those that are in the enclosing locals (captures).
1455        // Don't exclude names that *also* exist in `function_names`:
1456        // if the name is in `locals`, the local shadows the global
1457        // within this scope, and the lambda needs to capture the
1458        // local's value, not the global fn. (#339) Names that are
1459        // ONLY in `function_names` (no local) stay external — the
1460        // lambda's body resolves them at call time, same as the
1461        // enclosing fn would.
1462        let captures: Vec<String> = frees.into_iter()
1463            .filter(|n| self.locals.contains_key(n))
1464            .collect();
1465
1466        // Allocate a fresh fn_id by appending a placeholder Function.
1467        let fn_id = self.next_fn_id.len() as u32;
1468        self.next_fn_id.push(Function {
1469            name: format!("__lambda_{fn_id}"),
1470            arity: (captures.len() + params.len()) as u16,
1471            locals_count: 0,
1472            code: Vec::new(),
1473            effects: Vec::new(),
1474            // See #222: filled in at the end of the compile pass.
1475            body_hash: crate::program::ZERO_BODY_HASH,
1476            // Lambdas don't carry refinements at the surface today
1477            // (closure params don't accept `Type{x | ...}` syntax in
1478            // the parser). #209 stays focused on top-level fn decls;
1479            // closure-param refinements are a follow-up.
1480            refinements: Vec::new(),
1481            // Lambda body hasn't been compiled yet; filled in by the
1482            // deferred lambda-compile pass after FnCompiler walks it.
1483            field_ic_sites: 0,
1484        });
1485
1486        // Emit code at the lambda site: load each captured local, then MakeClosure.
1487        for c in &captures {
1488            let slot = *self.locals.get(c).expect("free var must be in scope");
1489            self.emit(Op::LoadLocal(slot));
1490        }
1491        self.emit(Op::MakeClosure { fn_id, capture_count: captures.len() as u16 });
1492
1493        // Queue the body for later compilation.
1494        self.pending_lambdas.push(PendingLambda {
1495            fn_id,
1496            capture_names: captures,
1497            params: params.to_vec(),
1498            body: body.clone(),
1499        });
1500    }
1501
1502    /// Higher-order stdlib ops on Result/Option whose function arg is a
1503    /// closure. Emit inline: pattern-match on the variant, invoke the
1504    /// closure when applicable, return wrapped result.
1505    fn try_emit_higher_order(
1506        &mut self,
1507        module: &str,
1508        op: &str,
1509        args: &[a::CExpr],
1510        node_id_idx: u32,
1511    ) -> bool {
1512        match (module, op) {
1513            ("result", "map") => self.emit_variant_map(args, "Ok", true),
1514            ("result", "and_then") => self.emit_variant_map(args, "Ok", false),
1515            ("result", "map_err") => self.emit_variant_map(args, "Err", true),
1516            ("result", "or_else") => self.emit_variant_or_else(args, "Err", 1),
1517            ("option", "map") => self.emit_variant_map(args, "Some", true),
1518            ("option", "and_then") => self.emit_variant_map(args, "Some", false),
1519            ("option", "or_else") => self.emit_variant_or_else(args, "None", 0),
1520            ("option", "unwrap_or_else") => self.emit_option_unwrap_or_else(args),
1521            ("list", "map") => self.emit_list_map(args),
1522            ("list", "par_map") => self.emit_list_par_map(args),
1523            ("list", "sort_by") => self.emit_list_sort_by(args),
1524            ("list", "filter") => self.emit_list_filter(args),
1525            ("list", "fold") => self.emit_list_fold(args),
1526            ("iter", "from_list") => self.emit_iter_from_list(args),
1527            ("iter", "unfold")    => self.emit_iter_unfold(args),
1528            ("iter", "next")      => self.emit_iter_next(args),
1529            ("iter", "is_empty")  => self.emit_iter_is_empty(args),
1530            ("iter", "count")     => self.emit_iter_count(args),
1531            ("iter", "take")      => self.emit_iter_take(args),
1532            ("iter", "skip")      => self.emit_iter_skip(args),
1533            ("iter", "to_list")   => self.emit_iter_to_list(args),
1534            ("iter", "collect")   => self.emit_iter_to_list(args),
1535            ("iter", "map")       => self.emit_iter_map(args),
1536            ("iter", "filter")    => self.emit_iter_filter(args),
1537            ("iter", "fold")      => self.emit_iter_fold(args),
1538            ("map", "fold") => self.emit_map_fold(args, node_id_idx),
1539            ("flow", "sequential") => self.emit_flow_sequential(args),
1540            ("flow", "branch") => self.emit_flow_branch(args),
1541            ("flow", "retry") => self.emit_flow_retry(args),
1542            ("flow", "retry_with_backoff") => self.emit_flow_retry_with_backoff(args),
1543            ("flow", "parallel") => self.emit_flow_parallel(args),
1544            ("flow", "parallel_list") => self.emit_flow_parallel_list(args),
1545            _ => return false,
1546        }
1547        true
1548    }
1549
1550    /// `list.map(xs, f)` — native map op (#464). Pushes `xs` then `f`
1551    /// and emits a single `Op::ListMap`. The previous inlined loop
1552    /// re-`LoadLocal`'d (cloned) the whole input and accumulator lists
1553    /// each iteration — O(n²); the native op owns the list and builds
1554    /// the result with one pre-sized allocation.
1555    fn emit_list_map(&mut self, args: &[a::CExpr]) {
1556        self.compile_expr(&args[0], false); // xs
1557        self.compile_expr(&args[1], false); // f
1558        let nid = self.pool.node_id("n_list_map");
1559        self.emit(Op::ListMap { node_id_idx: nid });
1560    }
1561
1562    /// `list.par_map(xs, f)` (#305 slice 1). Pushes `xs` and `f`,
1563    /// then emits a single `Op::ParallelMap` — the VM applies `f`
1564    /// to each element on OS-thread tasks, capped by
1565    /// `LEX_PAR_MAX_CONCURRENCY`. Returns the result list in input
1566    /// order.
1567    fn emit_list_par_map(&mut self, args: &[a::CExpr]) {
1568        self.compile_expr(&args[0], false);
1569        self.compile_expr(&args[1], false);
1570        let nid = self.pool.node_id("n_list_par_map");
1571        self.emit(Op::ParallelMap { node_id_idx: nid });
1572    }
1573
1574    /// `list.sort_by(xs, f)` (#338). Pushes `xs` and the key-fn
1575    /// `f`, then emits a single `Op::SortByKey` — the VM invokes
1576    /// `f` on each element to derive a sortable key, stable-sorts
1577    /// by key, and returns the values in sorted order. Keys must
1578    /// resolve to `Int` / `Float` / `Str`; mixed-type pairs are
1579    /// treated as equal by the comparator (preserving insertion
1580    /// order via the stable sort).
1581    fn emit_list_sort_by(&mut self, args: &[a::CExpr]) {
1582        self.compile_expr(&args[0], false);
1583        self.compile_expr(&args[1], false);
1584        let nid = self.pool.node_id("n_list_sort_by");
1585        self.emit(Op::SortByKey { node_id_idx: nid });
1586    }
1587
1588    /// `list.filter(xs, pred)` — native filter op (#464). Same
1589    /// rationale as `emit_list_map`.
1590    fn emit_list_filter(&mut self, args: &[a::CExpr]) {
1591        self.compile_expr(&args[0], false); // xs
1592        self.compile_expr(&args[1], false); // pred
1593        let nid = self.pool.node_id("n_list_filter");
1594        self.emit(Op::ListFilter { node_id_idx: nid });
1595    }
1596
1597    /// `list.fold(xs, init, f)` — native left-fold op (#464). Same
1598    /// rationale as `emit_list_map`. Stack: `[xs, init, f]`.
1599    fn emit_list_fold(&mut self, args: &[a::CExpr]) {
1600        self.compile_expr(&args[0], false); // xs
1601        self.compile_expr(&args[1], false); // init
1602        self.compile_expr(&args[2], false); // f
1603        let nid = self.pool.node_id("n_list_fold");
1604        self.emit(Op::ListFold { node_id_idx: nid });
1605    }
1606
1607    // ── Iter[T] operations (#364) ─────────────────────────────────────────
1608    // Internal representation: `Value::Variant("__IterEager", [list, idx])`
1609    // for the eager form (a List backing store + Int cursor) and
1610    // `Value::Variant("__IterLazy", [seed, step_closure])` for the lazy form
1611    // produced by `iter.unfold` (#376). Both are tagged variants so each op
1612    // can `TestVariant` at runtime to dispatch. The names start with `__` so
1613    // they can't be written by user code (uppercase ASCII-letter is required
1614    // for constructor names, and the underscores keep them out of the
1615    // user-namespace by convention).
1616
1617    /// `iter.from_list(xs)` — wrap a list in an eager iterator at position 0.
1618    fn emit_iter_from_list(&mut self, args: &[a::CExpr]) {
1619        self.compile_expr(&args[0], false);
1620        let zero = self.pool.int(0);
1621        self.emit(Op::PushConst(zero));
1622        let v = self.pool.variant("__IterEager");
1623        self.emit(Op::MakeVariant { name_idx: v, arity: 2 });
1624    }
1625
1626    /// `iter.next(it)` — advance one step; returns `Option[(T, Iter[T])]`.
1627    ///
1628    /// Dispatches on the iter's variant tag:
1629    /// - `__IterLazy(seed, step)` (#376) → invoke `step(seed)`. On
1630    ///   `Some((t, s'))` wrap as `Some((t, __IterLazy(s', step)))`; on
1631    ///   `None` propagate `None`. The seed advances forward each call.
1632    /// - `__IterCursor(handle)` (#379) → effect-call `sql.cursor_next(handle)`
1633    ///   which returns `Option[T]`. On `Some(row)` wrap as
1634    ///   `Some((row, __IterCursor(handle)))`; on `None` propagate. Handle
1635    ///   stays stable across calls — state is server-side / mpsc-buffered.
1636    /// - `__IterEager(list, idx)` → existing positional cursor.
1637    fn emit_iter_next(&mut self, args: &[a::CExpr]) {
1638        self.compile_expr(&args[0], false);
1639        let it = self.alloc_local("__in_it");
1640        self.emit(Op::StoreLocal(it));
1641
1642        // Dispatch: TestVariant pops; we Dup to keep the iter around.
1643        self.emit(Op::LoadLocal(it));
1644        self.emit(Op::Dup);
1645        let lazy_name = self.pool.variant("__IterLazy");
1646        self.emit(Op::TestVariant(lazy_name));
1647        let j_to_check_cursor = self.code.len();
1648        self.emit(Op::JumpIfNot(0));
1649
1650        // ── lazy path ────────────────────────────────────────────────
1651        // The Dup'd iter is on stack but we've consumed it via TestVariant,
1652        // so reload from the local.
1653        self.emit(Op::LoadLocal(it));
1654        self.emit(Op::GetVariantArg(0)); // seed
1655        let seed = self.alloc_local("__in_seed");
1656        self.emit(Op::StoreLocal(seed));
1657
1658        self.emit(Op::LoadLocal(it));
1659        self.emit(Op::GetVariantArg(1)); // step closure
1660        let step = self.alloc_local("__in_step");
1661        self.emit(Op::StoreLocal(step));
1662
1663        // Call step(seed) → Option[(T, S)].
1664        let nid_lazy = self.pool.node_id("n_iter_next_lazy");
1665        self.emit(Op::LoadLocal(step));
1666        self.emit(Op::LoadLocal(seed));
1667        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid_lazy });
1668        let opt = self.alloc_local("__in_opt");
1669        self.emit(Op::StoreLocal(opt));
1670
1671        // If `step` returned None, propagate it directly.
1672        self.emit(Op::LoadLocal(opt));
1673        let some_name = self.pool.variant("Some");
1674        self.emit(Op::TestVariant(some_name));
1675        let j_lazy_none = self.code.len();
1676        self.emit(Op::JumpIfNot(0));
1677
1678        // Some((t, new_seed)) — extract the inner tuple, repackage as
1679        // Some((t, __IterLazy(new_seed, step))) so the next call advances.
1680        self.emit(Op::LoadLocal(opt));
1681        self.emit(Op::GetVariantArg(0));     // (t, new_seed)
1682        let pair = self.alloc_local("__in_pair");
1683        self.emit(Op::StoreLocal(pair));
1684
1685        self.emit(Op::LoadLocal(pair));
1686        self.emit(Op::GetElem(0));           // t
1687        self.emit(Op::LoadLocal(pair));
1688        self.emit(Op::GetElem(1));           // new_seed
1689        self.emit(Op::LoadLocal(step));      // step closure
1690        let lazy_v = self.pool.variant("__IterLazy");
1691        self.emit(Op::MakeVariant { name_idx: lazy_v, arity: 2 }); // __IterLazy(new_seed, step)
1692        self.emit(Op::MakeTuple(2));         // (t, new_iter)
1693        let some_v = self.pool.variant("Some");
1694        self.emit(Op::MakeVariant { name_idx: some_v, arity: 1 });
1695        let j_after_lazy = self.code.len();
1696        self.emit(Op::Jump(0));
1697
1698        // Lazy → None: just forward the None.
1699        let none_t = self.code.len() as i32;
1700        if let Op::JumpIfNot(off) = &mut self.code[j_lazy_none] {
1701            *off = none_t - (j_lazy_none as i32 + 1);
1702        }
1703        let none_v = self.pool.variant("None");
1704        self.emit(Op::MakeVariant { name_idx: none_v, arity: 0 });
1705        let j_after_lazy_none = self.code.len();
1706        self.emit(Op::Jump(0));
1707
1708        // ── cursor path (#379) ───────────────────────────────────────
1709        let cursor_check_t = self.code.len() as i32;
1710        if let Op::JumpIfNot(off) = &mut self.code[j_to_check_cursor] {
1711            *off = cursor_check_t - (j_to_check_cursor as i32 + 1);
1712        }
1713
1714        self.emit(Op::LoadLocal(it));
1715        self.emit(Op::Dup);
1716        let cursor_name = self.pool.variant("__IterCursor");
1717        self.emit(Op::TestVariant(cursor_name));
1718        let j_to_eager = self.code.len();
1719        self.emit(Op::JumpIfNot(0));
1720
1721        // Cursor path: extract handle, effect-call sql.cursor_next(handle).
1722        // The handler returns Option[T] directly. We then wrap as
1723        // Some((T, __IterCursor(handle))) or forward None.
1724        self.emit(Op::LoadLocal(it));
1725        self.emit(Op::GetVariantArg(0));     // handle
1726        let handle = self.alloc_local("__in_handle");
1727        self.emit(Op::StoreLocal(handle));
1728
1729        let kind_idx = self.pool.str("sql");
1730        let op_idx = self.pool.str("cursor_next");
1731        let nid_cursor = self.pool.node_id("n_iter_next_cursor");
1732        self.emit(Op::LoadLocal(handle));
1733        self.emit(Op::EffectCall {
1734            kind_idx,
1735            op_idx,
1736            arity: 1,
1737            node_id_idx: nid_cursor,
1738        });
1739        let cur_opt = self.alloc_local("__in_cur_opt");
1740        self.emit(Op::StoreLocal(cur_opt));
1741
1742        self.emit(Op::LoadLocal(cur_opt));
1743        let some_c = self.pool.variant("Some");
1744        self.emit(Op::TestVariant(some_c));
1745        let j_cursor_none = self.code.len();
1746        self.emit(Op::JumpIfNot(0));
1747
1748        // Some(row): build Some((row, __IterCursor(handle)))
1749        self.emit(Op::LoadLocal(cur_opt));
1750        self.emit(Op::GetVariantArg(0));     // row
1751        self.emit(Op::LoadLocal(handle));
1752        let cursor_v = self.pool.variant("__IterCursor");
1753        self.emit(Op::MakeVariant { name_idx: cursor_v, arity: 1 });
1754        self.emit(Op::MakeTuple(2));         // (row, __IterCursor(handle))
1755        let some_c2 = self.pool.variant("Some");
1756        self.emit(Op::MakeVariant { name_idx: some_c2, arity: 1 });
1757        let j_after_cursor = self.code.len();
1758        self.emit(Op::Jump(0));
1759
1760        // Cursor → None
1761        let cursor_none_t = self.code.len() as i32;
1762        if let Op::JumpIfNot(off) = &mut self.code[j_cursor_none] {
1763            *off = cursor_none_t - (j_cursor_none as i32 + 1);
1764        }
1765        let none_c = self.pool.variant("None");
1766        self.emit(Op::MakeVariant { name_idx: none_c, arity: 0 });
1767        let j_after_cursor_none = self.code.len();
1768        self.emit(Op::Jump(0));
1769
1770        // ── eager path ───────────────────────────────────────────────
1771        let eager_t = self.code.len() as i32;
1772        if let Op::JumpIfNot(off) = &mut self.code[j_to_eager] {
1773            *off = eager_t - (j_to_eager as i32 + 1);
1774        }
1775
1776        self.emit(Op::LoadLocal(it));
1777        self.emit(Op::GetVariantArg(0));
1778        let list = self.alloc_local("__in_list");
1779        self.emit(Op::StoreLocal(list));
1780
1781        self.emit(Op::LoadLocal(it));
1782        self.emit(Op::GetVariantArg(1));
1783        let idx = self.alloc_local("__in_idx");
1784        self.emit(Op::StoreLocal(idx));
1785
1786        // if idx < len(list)
1787        self.emit(Op::LoadLocal(idx));
1788        self.emit(Op::LoadLocal(list));
1789        self.emit(Op::GetListLen);
1790        self.emit(Op::IntLt);
1791        let j_eager_else = self.code.len();
1792        self.emit(Op::JumpIfNot(0));
1793
1794        // Some((item, __IterEager(list, idx+1)))
1795        self.emit(Op::LoadLocal(list));
1796        self.emit(Op::LoadLocal(idx));
1797        self.emit(Op::GetListElemDyn);
1798
1799        self.emit(Op::LoadLocal(list));
1800        self.emit(Op::LoadLocal(idx));
1801        let one = self.pool.int(1);
1802        self.emit(Op::PushConst(one));
1803        self.emit(Op::IntAdd);
1804        let eager_v = self.pool.variant("__IterEager");
1805        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
1806        self.emit(Op::MakeTuple(2));
1807        let some_e = self.pool.variant("Some");
1808        self.emit(Op::MakeVariant { name_idx: some_e, arity: 1 });
1809        let j_after_eager = self.code.len();
1810        self.emit(Op::Jump(0));
1811
1812        // Eager → None
1813        let eager_none_t = self.code.len() as i32;
1814        if let Op::JumpIfNot(off) = &mut self.code[j_eager_else] {
1815            *off = eager_none_t - (j_eager_else as i32 + 1);
1816        }
1817        let none_e = self.pool.variant("None");
1818        self.emit(Op::MakeVariant { name_idx: none_e, arity: 0 });
1819
1820        // Converge all paths.
1821        let end = self.code.len() as i32;
1822        if let Op::Jump(off) = &mut self.code[j_after_lazy] {
1823            *off = end - (j_after_lazy as i32 + 1);
1824        }
1825        if let Op::Jump(off) = &mut self.code[j_after_lazy_none] {
1826            *off = end - (j_after_lazy_none as i32 + 1);
1827        }
1828        if let Op::Jump(off) = &mut self.code[j_after_cursor] {
1829            *off = end - (j_after_cursor as i32 + 1);
1830        }
1831        if let Op::Jump(off) = &mut self.code[j_after_cursor_none] {
1832            *off = end - (j_after_cursor_none as i32 + 1);
1833        }
1834        if let Op::Jump(off) = &mut self.code[j_after_eager] {
1835            *off = end - (j_after_eager as i32 + 1);
1836        }
1837    }
1838
1839    /// `iter.unfold(seed, step)` — lazy iterator that calls `step(seed)` on
1840    /// each `iter.next` and threads the new seed forward. Internal value
1841    /// shape: `__IterLazy(seed, step)`. Step has type `(S) -> Option[(T, S)]`;
1842    /// returning `None` ends the iteration (#376).
1843    fn emit_iter_unfold(&mut self, args: &[a::CExpr]) {
1844        self.compile_expr(&args[0], false); // seed
1845        self.compile_expr(&args[1], false); // step
1846        let lazy = self.pool.variant("__IterLazy");
1847        self.emit(Op::MakeVariant { name_idx: lazy, arity: 2 });
1848    }
1849
1850    /// `iter.is_empty(it)` — true iff no further element. v1 supports the
1851    /// eager form O(1); on a lazy iter the seed sits in slot 0 and is not a
1852    /// List, so the VM trips on `GetListLen` rather than returning a wrong
1853    /// answer. Callers needing lazy support should materialize with
1854    /// `iter.to_list` first or call `iter.next` and pattern-match.
1855    fn emit_iter_is_empty(&mut self, args: &[a::CExpr]) {
1856        self.compile_expr(&args[0], false);
1857        let it = self.alloc_local("__ie_it");
1858        self.emit(Op::StoreLocal(it));
1859
1860        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1)); // idx
1861        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0)); // list
1862        self.emit(Op::GetListLen);                                     // len
1863        self.emit(Op::IntLt);                                          // idx < len
1864        self.emit(Op::BoolNot);                                        // NOT(idx < len)
1865    }
1866
1867    /// `iter.count(it)` — number of remaining elements (v1: eager-only).
1868    fn emit_iter_count(&mut self, args: &[a::CExpr]) {
1869        self.compile_expr(&args[0], false);
1870        let it = self.alloc_local("__ic_it");
1871        self.emit(Op::StoreLocal(it));
1872
1873        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1874        self.emit(Op::GetListLen);                                     // len
1875        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1)); // idx
1876        self.emit(Op::IntSub);                                         // len - idx
1877    }
1878
1879    /// `iter.take(it, n)` — collect up to n elements, return as new Iter.
1880    fn emit_iter_take(&mut self, args: &[a::CExpr]) {
1881        self.compile_expr(&args[0], false);
1882        let it   = self.alloc_local("__itk_it");
1883        self.emit(Op::StoreLocal(it));
1884
1885        self.compile_expr(&args[1], false);
1886        let n    = self.alloc_local("__itk_n");
1887        self.emit(Op::StoreLocal(n));
1888
1889        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1890        let list = self.alloc_local("__itk_list");
1891        self.emit(Op::StoreLocal(list));
1892
1893        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
1894        let i    = self.alloc_local("__itk_i");
1895        self.emit(Op::StoreLocal(i));
1896
1897        self.emit(Op::MakeList(0));
1898        let out  = self.alloc_local("__itk_out");
1899        self.emit(Op::StoreLocal(out));
1900
1901        let zero = self.pool.int(0);
1902        self.emit(Op::PushConst(zero));
1903        let cnt  = self.alloc_local("__itk_cnt");
1904        self.emit(Op::StoreLocal(cnt));
1905
1906        let loop_top = self.code.len();
1907
1908        // while cnt < n
1909        self.emit(Op::LoadLocal(cnt));
1910        self.emit(Op::LoadLocal(n));
1911        self.emit(Op::IntLt);
1912        let j_exit_n = self.code.len();
1913        self.emit(Op::JumpIfNot(0));
1914
1915        // AND i < len(list)
1916        self.emit(Op::LoadLocal(i));
1917        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
1918        self.emit(Op::IntLt);
1919        let j_exit_l = self.code.len();
1920        self.emit(Op::JumpIfNot(0));
1921
1922        // out = out ++ [list[i]]
1923        self.emit(Op::LoadLocal(out));
1924        self.emit(Op::LoadLocal(list));
1925        self.emit(Op::LoadLocal(i));
1926        self.emit(Op::GetListElemDyn);
1927        self.emit(Op::ListAppend);
1928        self.emit(Op::StoreLocal(out));
1929
1930        let one = self.pool.int(1);
1931        // i = i + 1
1932        self.emit(Op::LoadLocal(i));
1933        self.emit(Op::PushConst(one));
1934        self.emit(Op::IntAdd);
1935        self.emit(Op::StoreLocal(i));
1936        // cnt = cnt + 1
1937        self.emit(Op::LoadLocal(cnt));
1938        self.emit(Op::PushConst(one));
1939        self.emit(Op::IntAdd);
1940        self.emit(Op::StoreLocal(cnt));
1941
1942        let jback = self.code.len();
1943        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
1944
1945        let exit_t = self.code.len() as i32;
1946        if let Op::JumpIfNot(off) = &mut self.code[j_exit_n] { *off = exit_t - (j_exit_n as i32 + 1); }
1947        if let Op::JumpIfNot(off) = &mut self.code[j_exit_l] { *off = exit_t - (j_exit_l as i32 + 1); }
1948
1949        // return new __IterEager(out, 0)
1950        self.emit(Op::LoadLocal(out));
1951        self.emit(Op::PushConst(zero));
1952        let eager_v = self.pool.variant("__IterEager");
1953        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
1954    }
1955
1956    /// `iter.skip(it, n)` — advance cursor by n (or to end), return new Iter.
1957    fn emit_iter_skip(&mut self, args: &[a::CExpr]) {
1958        self.compile_expr(&args[0], false);
1959        let it   = self.alloc_local("__isk_it");
1960        self.emit(Op::StoreLocal(it));
1961
1962        self.compile_expr(&args[1], false);
1963        let n    = self.alloc_local("__isk_n");
1964        self.emit(Op::StoreLocal(n));
1965
1966        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
1967        let list = self.alloc_local("__isk_list");
1968        self.emit(Op::StoreLocal(list));
1969
1970        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
1971        let idx  = self.alloc_local("__isk_idx");
1972        self.emit(Op::StoreLocal(idx));
1973
1974        // raw = idx + n
1975        self.emit(Op::LoadLocal(idx));
1976        self.emit(Op::LoadLocal(n));
1977        self.emit(Op::IntAdd);
1978        let raw  = self.alloc_local("__isk_raw");
1979        self.emit(Op::StoreLocal(raw));
1980
1981        // new_idx = if raw < len then raw else len
1982        self.emit(Op::LoadLocal(raw));
1983        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
1984        self.emit(Op::IntLt);
1985        let j_use_raw = self.code.len();
1986        self.emit(Op::JumpIf(0));
1987
1988        // use len
1989        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
1990        let j_end = self.code.len();
1991        self.emit(Op::Jump(0));
1992
1993        // use raw
1994        let raw_t = self.code.len() as i32;
1995        if let Op::JumpIf(off) = &mut self.code[j_use_raw] { *off = raw_t - (j_use_raw as i32 + 1); }
1996        self.emit(Op::LoadLocal(raw));
1997
1998        let end_t = self.code.len() as i32;
1999        if let Op::Jump(off) = &mut self.code[j_end] { *off = end_t - (j_end as i32 + 1); }
2000
2001        // new_idx on stack; build new __IterEager(list, new_idx)
2002        let new_idx = self.alloc_local("__isk_ni");
2003        self.emit(Op::StoreLocal(new_idx));
2004        self.emit(Op::LoadLocal(list));
2005        self.emit(Op::LoadLocal(new_idx));
2006        let eager_v = self.pool.variant("__IterEager");
2007        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
2008    }
2009
2010    /// `iter.to_list(it)` — materialise remaining elements into a List.
2011    ///
2012    /// Dispatches on the iter variant (#376):
2013    /// - `__IterLazy`: repeatedly call `step(seed)`; on `Some((t, s'))` append
2014    ///   `t` and continue with `s'`; on `None` stop. May hang on truly
2015    ///   infinite producers — that's documented as a v1 limitation, the
2016    ///   step-limit-protected caller is what catches misuse.
2017    /// - `__IterEager`: slice the backing list from `idx` onward (O(n) walk).
2018    fn emit_iter_to_list(&mut self, args: &[a::CExpr]) {
2019        self.compile_expr(&args[0], false);
2020        let it = self.alloc_local("__itl_it");
2021        self.emit(Op::StoreLocal(it));
2022
2023        // Build the output list up-front, shared across both paths.
2024        self.emit(Op::MakeList(0));
2025        let out = self.alloc_local("__itl_out");
2026        self.emit(Op::StoreLocal(out));
2027
2028        // Dispatch on variant tag.
2029        self.emit(Op::LoadLocal(it));
2030        let lazy_name = self.pool.variant("__IterLazy");
2031        self.emit(Op::TestVariant(lazy_name));
2032        let j_to_eager = self.code.len();
2033        self.emit(Op::JumpIfNot(0));
2034
2035        // ── lazy path ─────────────────────────────────────────────────
2036        // seed and step closure live in locals; we update seed each iteration.
2037        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2038        let seed = self.alloc_local("__itl_seed");
2039        self.emit(Op::StoreLocal(seed));
2040
2041        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2042        let step = self.alloc_local("__itl_step");
2043        self.emit(Op::StoreLocal(step));
2044
2045        let lazy_loop = self.code.len();
2046        let nid_lazy = self.pool.node_id("n_iter_to_list_lazy");
2047        self.emit(Op::LoadLocal(step));
2048        self.emit(Op::LoadLocal(seed));
2049        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid_lazy });
2050        let opt = self.alloc_local("__itl_opt");
2051        self.emit(Op::StoreLocal(opt));
2052
2053        // If None, drop out of the lazy loop.
2054        self.emit(Op::LoadLocal(opt));
2055        let some_name = self.pool.variant("Some");
2056        self.emit(Op::TestVariant(some_name));
2057        let j_lazy_exit = self.code.len();
2058        self.emit(Op::JumpIfNot(0));
2059
2060        // Some((t, new_seed)): append t to out, replace seed.
2061        self.emit(Op::LoadLocal(opt));
2062        self.emit(Op::GetVariantArg(0));
2063        let pair = self.alloc_local("__itl_pair");
2064        self.emit(Op::StoreLocal(pair));
2065
2066        self.emit(Op::LoadLocal(out));
2067        self.emit(Op::LoadLocal(pair)); self.emit(Op::GetElem(0));
2068        self.emit(Op::ListAppend);
2069        self.emit(Op::StoreLocal(out));
2070
2071        self.emit(Op::LoadLocal(pair)); self.emit(Op::GetElem(1));
2072        self.emit(Op::StoreLocal(seed));
2073
2074        let jback_lazy = self.code.len();
2075        self.emit(Op::Jump((lazy_loop as i32) - (jback_lazy as i32 + 1)));
2076
2077        let lazy_exit_t = self.code.len() as i32;
2078        if let Op::JumpIfNot(off) = &mut self.code[j_lazy_exit] {
2079            *off = lazy_exit_t - (j_lazy_exit as i32 + 1);
2080        }
2081        let j_after_lazy = self.code.len();
2082        self.emit(Op::Jump(0));
2083
2084        // ── eager path ────────────────────────────────────────────────
2085        let eager_t = self.code.len() as i32;
2086        if let Op::JumpIfNot(off) = &mut self.code[j_to_eager] {
2087            *off = eager_t - (j_to_eager as i32 + 1);
2088        }
2089
2090        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2091        let list = self.alloc_local("__itl_list");
2092        self.emit(Op::StoreLocal(list));
2093
2094        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2095        let i = self.alloc_local("__itl_i");
2096        self.emit(Op::StoreLocal(i));
2097
2098        let loop_top = self.code.len();
2099        self.emit(Op::LoadLocal(i));
2100        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2101        self.emit(Op::IntLt);
2102        let j_exit = self.code.len();
2103        self.emit(Op::JumpIfNot(0));
2104
2105        self.emit(Op::LoadLocal(out));
2106        self.emit(Op::LoadLocal(list));
2107        self.emit(Op::LoadLocal(i));
2108        self.emit(Op::GetListElemDyn);
2109        self.emit(Op::ListAppend);
2110        self.emit(Op::StoreLocal(out));
2111
2112        self.emit(Op::LoadLocal(i));
2113        let one = self.pool.int(1);
2114        self.emit(Op::PushConst(one));
2115        self.emit(Op::IntAdd);
2116        self.emit(Op::StoreLocal(i));
2117
2118        let jback = self.code.len();
2119        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2120
2121        let exit_t = self.code.len() as i32;
2122        if let Op::JumpIfNot(off) = &mut self.code[j_exit] {
2123            *off = exit_t - (j_exit as i32 + 1);
2124        }
2125
2126        // Converge: lazy path falls through here too.
2127        let converge = self.code.len() as i32;
2128        if let Op::Jump(off) = &mut self.code[j_after_lazy] {
2129            *off = converge - (j_after_lazy as i32 + 1);
2130        }
2131        self.emit(Op::LoadLocal(out));
2132    }
2133
2134    /// `iter.map(it, f)` — apply `f` to each remaining element; returns new Iter.
2135    fn emit_iter_map(&mut self, args: &[a::CExpr]) {
2136        self.compile_expr(&args[0], false);
2137        let it   = self.alloc_local("__im_it");
2138        self.emit(Op::StoreLocal(it));
2139
2140        self.compile_expr(&args[1], false);
2141        let f    = self.alloc_local("__im_f");
2142        self.emit(Op::StoreLocal(f));
2143
2144        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2145        let list = self.alloc_local("__im_list");
2146        self.emit(Op::StoreLocal(list));
2147
2148        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2149        let i    = self.alloc_local("__im_i");
2150        self.emit(Op::StoreLocal(i));
2151
2152        self.emit(Op::MakeList(0));
2153        let out  = self.alloc_local("__im_out");
2154        self.emit(Op::StoreLocal(out));
2155
2156        let loop_top = self.code.len();
2157        self.emit(Op::LoadLocal(i));
2158        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2159        self.emit(Op::IntLt);
2160        let j_exit = self.code.len();
2161        self.emit(Op::JumpIfNot(0));
2162
2163        let nid = self.pool.node_id("n_iter_map");
2164        self.emit(Op::LoadLocal(out));
2165        self.emit(Op::LoadLocal(f));
2166        self.emit(Op::LoadLocal(list));
2167        self.emit(Op::LoadLocal(i));
2168        self.emit(Op::GetListElemDyn);
2169        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2170        self.emit(Op::ListAppend);
2171        self.emit(Op::StoreLocal(out));
2172
2173        self.emit(Op::LoadLocal(i));
2174        let one = self.pool.int(1);
2175        self.emit(Op::PushConst(one));
2176        self.emit(Op::IntAdd);
2177        self.emit(Op::StoreLocal(i));
2178
2179        let jback = self.code.len();
2180        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2181
2182        let exit_t = self.code.len() as i32;
2183        if let Op::JumpIfNot(off) = &mut self.code[j_exit] { *off = exit_t - (j_exit as i32 + 1); }
2184
2185        let zero = self.pool.int(0);
2186        self.emit(Op::LoadLocal(out));
2187        self.emit(Op::PushConst(zero));
2188        let eager_v = self.pool.variant("__IterEager");
2189        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
2190    }
2191
2192    /// `iter.filter(it, pred)` — keep elements where pred is true; returns new Iter.
2193    fn emit_iter_filter(&mut self, args: &[a::CExpr]) {
2194        self.compile_expr(&args[0], false);
2195        let it   = self.alloc_local("__if_it");
2196        self.emit(Op::StoreLocal(it));
2197
2198        self.compile_expr(&args[1], false);
2199        let f    = self.alloc_local("__if_f");
2200        self.emit(Op::StoreLocal(f));
2201
2202        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2203        let list = self.alloc_local("__if_list");
2204        self.emit(Op::StoreLocal(list));
2205
2206        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2207        let i    = self.alloc_local("__if_i");
2208        self.emit(Op::StoreLocal(i));
2209
2210        self.emit(Op::MakeList(0));
2211        let out  = self.alloc_local("__if_out");
2212        self.emit(Op::StoreLocal(out));
2213
2214        let loop_top = self.code.len();
2215        self.emit(Op::LoadLocal(i));
2216        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2217        self.emit(Op::IntLt);
2218        let j_exit = self.code.len();
2219        self.emit(Op::JumpIfNot(0));
2220
2221        // elem := list[i]
2222        self.emit(Op::LoadLocal(list));
2223        self.emit(Op::LoadLocal(i));
2224        self.emit(Op::GetListElemDyn);
2225        let x    = self.alloc_local("__if_x");
2226        self.emit(Op::StoreLocal(x));
2227
2228        let nid = self.pool.node_id("n_iter_filter");
2229        self.emit(Op::LoadLocal(f));
2230        self.emit(Op::LoadLocal(x));
2231        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2232        let j_skip = self.code.len();
2233        self.emit(Op::JumpIfNot(0));
2234
2235        self.emit(Op::LoadLocal(out));
2236        self.emit(Op::LoadLocal(x));
2237        self.emit(Op::ListAppend);
2238        self.emit(Op::StoreLocal(out));
2239
2240        let skip_t = self.code.len() as i32;
2241        if let Op::JumpIfNot(off) = &mut self.code[j_skip] { *off = skip_t - (j_skip as i32 + 1); }
2242
2243        self.emit(Op::LoadLocal(i));
2244        let one = self.pool.int(1);
2245        self.emit(Op::PushConst(one));
2246        self.emit(Op::IntAdd);
2247        self.emit(Op::StoreLocal(i));
2248
2249        let jback = self.code.len();
2250        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2251
2252        let exit_t = self.code.len() as i32;
2253        if let Op::JumpIfNot(off) = &mut self.code[j_exit] { *off = exit_t - (j_exit as i32 + 1); }
2254
2255        let zero = self.pool.int(0);
2256        self.emit(Op::LoadLocal(out));
2257        self.emit(Op::PushConst(zero));
2258        let eager_v = self.pool.variant("__IterEager");
2259        self.emit(Op::MakeVariant { name_idx: eager_v, arity: 2 });
2260    }
2261
2262    /// `iter.fold(it, init, f)` — left fold over remaining elements.
2263    fn emit_iter_fold(&mut self, args: &[a::CExpr]) {
2264        self.compile_expr(&args[0], false);
2265        let it   = self.alloc_local("__ifo_it");
2266        self.emit(Op::StoreLocal(it));
2267
2268        self.compile_expr(&args[1], false);
2269        let acc  = self.alloc_local("__ifo_acc");
2270        self.emit(Op::StoreLocal(acc));
2271
2272        self.compile_expr(&args[2], false);
2273        let f    = self.alloc_local("__ifo_f");
2274        self.emit(Op::StoreLocal(f));
2275
2276        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(0));
2277        let list = self.alloc_local("__ifo_list");
2278        self.emit(Op::StoreLocal(list));
2279
2280        self.emit(Op::LoadLocal(it)); self.emit(Op::GetVariantArg(1));
2281        let i    = self.alloc_local("__ifo_i");
2282        self.emit(Op::StoreLocal(i));
2283
2284        let loop_top = self.code.len();
2285        self.emit(Op::LoadLocal(i));
2286        self.emit(Op::LoadLocal(list)); self.emit(Op::GetListLen);
2287        self.emit(Op::IntLt);
2288        let j_exit = self.code.len();
2289        self.emit(Op::JumpIfNot(0));
2290
2291        let nid = self.pool.node_id("n_iter_fold");
2292        self.emit(Op::LoadLocal(f));
2293        self.emit(Op::LoadLocal(acc));
2294        self.emit(Op::LoadLocal(list));
2295        self.emit(Op::LoadLocal(i));
2296        self.emit(Op::GetListElemDyn);
2297        self.emit(Op::CallClosure { arity: 2, node_id_idx: nid });
2298        self.emit(Op::StoreLocal(acc));
2299
2300        self.emit(Op::LoadLocal(i));
2301        let one = self.pool.int(1);
2302        self.emit(Op::PushConst(one));
2303        self.emit(Op::IntAdd);
2304        self.emit(Op::StoreLocal(i));
2305
2306        let jback = self.code.len();
2307        self.emit(Op::Jump((loop_top as i32) - (jback as i32 + 1)));
2308
2309        let exit_t = self.code.len() as i32;
2310        if let Op::JumpIfNot(off) = &mut self.code[j_exit] { *off = exit_t - (j_exit as i32 + 1); }
2311        self.emit(Op::LoadLocal(acc));
2312    }
2313
2314    /// `map.fold(m, init, f)` — left fold over `Map[K, V]` entries with a
2315    /// three-arg combiner `f(acc, k, v)`. Iteration order matches
2316    /// `map.entries` (BTreeMap-sorted by key). Materializes the entry
2317    /// list once via the runtime's `("map", "entries")` op, then runs
2318    /// the same inline loop as `list.fold`.
2319    fn emit_map_fold(&mut self, args: &[a::CExpr], node_id_idx: u32) {
2320        // xs := map.entries(m)
2321        self.compile_expr(&args[0], false);
2322        let map_kind = self.pool.str("map");
2323        let entries_op = self.pool.str("entries");
2324        self.emit(Op::EffectCall {
2325            kind_idx: map_kind,
2326            op_idx: entries_op,
2327            arity: 1,
2328            node_id_idx,
2329        });
2330        let xs = self.alloc_local("__mf_xs");
2331        self.emit(Op::StoreLocal(xs));
2332
2333        // acc := init
2334        self.compile_expr(&args[1], false);
2335        let acc = self.alloc_local("__mf_acc");
2336        self.emit(Op::StoreLocal(acc));
2337
2338        // f := <closure>
2339        self.compile_expr(&args[2], false);
2340        let f = self.alloc_local("__mf_f");
2341        self.emit(Op::StoreLocal(f));
2342
2343        // i := 0
2344        let zero = self.pool.int(0);
2345        self.emit(Op::PushConst(zero));
2346        let i = self.alloc_local("__mf_i");
2347        self.emit(Op::StoreLocal(i));
2348
2349        // loop_top: while i < len(xs)
2350        let loop_top = self.code.len();
2351        self.emit(Op::LoadLocal(i));
2352        self.emit(Op::LoadLocal(xs));
2353        self.emit(Op::GetListLen);
2354        self.emit(Op::IntLt);
2355        let j_exit = self.code.len();
2356        self.emit(Op::JumpIfNot(0));
2357
2358        // pair := xs[i]
2359        self.emit(Op::LoadLocal(xs));
2360        self.emit(Op::LoadLocal(i));
2361        self.emit(Op::GetListElemDyn);
2362        let pair = self.alloc_local("__mf_pair");
2363        self.emit(Op::StoreLocal(pair));
2364
2365        // acc := f(acc, pair.0, pair.1)
2366        let nid = self.pool.node_id("n_map_fold");
2367        self.emit(Op::LoadLocal(f));
2368        self.emit(Op::LoadLocal(acc));
2369        self.emit(Op::LoadLocal(pair));
2370        self.emit(Op::GetElem(0));
2371        self.emit(Op::LoadLocal(pair));
2372        self.emit(Op::GetElem(1));
2373        self.emit(Op::CallClosure { arity: 3, node_id_idx: nid });
2374        self.emit(Op::StoreLocal(acc));
2375
2376        // i := i + 1
2377        self.emit(Op::LoadLocal(i));
2378        let one = self.pool.int(1);
2379        self.emit(Op::PushConst(one));
2380        self.emit(Op::IntAdd);
2381        self.emit(Op::StoreLocal(i));
2382
2383        let jump_back = self.code.len();
2384        let back = (loop_top as i32) - (jump_back as i32 + 1);
2385        self.emit(Op::Jump(back));
2386
2387        let exit_target = self.code.len() as i32;
2388        if let Op::JumpIfNot(off) = &mut self.code[j_exit] {
2389            *off = exit_target - (j_exit as i32 + 1);
2390        }
2391        self.emit(Op::LoadLocal(acc));
2392    }
2393
2394    /// Inline pattern: `<module>.map(v, f)` and friends.
2395    /// `wrap_with`: variant tag whose payload triggers the call (Ok / Some / Err).
2396    /// `wrap_result`: if true, wrap the closure's result back in `wrap_with`
2397    /// (map shape); if false, expect the closure to return a wrapped value
2398    /// itself (and_then shape).
2399    fn emit_variant_map(
2400        &mut self,
2401        args: &[a::CExpr],
2402        wrap_with: &str,
2403        wrap_result: bool,
2404    ) {
2405        // args[0] = the wrapped value (Result/Option), args[1] = closure
2406        let wrap_idx = self.pool.variant(wrap_with);
2407
2408        // Compile and store the value into a local, evaluate closure on top of stack.
2409        self.compile_expr(&args[0], false);
2410        let val_slot = self.alloc_local("__hov");
2411        self.emit(Op::StoreLocal(val_slot));
2412
2413        self.compile_expr(&args[1], false);
2414        let f_slot = self.alloc_local("__hof");
2415        self.emit(Op::StoreLocal(f_slot));
2416
2417        // Stack discipline:
2418        //   load val ⇒ [v]
2419        //   dup     ⇒ [v, v]
2420        //   test    ⇒ [v, Bool]
2421        //   jumpifnot ⇒ [v]
2422        // Both branches end with [v] before the branch body.
2423        self.emit(Op::LoadLocal(val_slot));
2424        self.emit(Op::Dup);
2425        self.emit(Op::TestVariant(wrap_idx));
2426        let j_skip = self.code.len();
2427        self.emit(Op::JumpIfNot(0));
2428
2429        // Matched arm: extract payload, call closure on it.
2430        self.emit(Op::GetVariantArg(0));
2431        let arg_slot = self.alloc_local("__hov_arg");
2432        self.emit(Op::StoreLocal(arg_slot));
2433        self.emit(Op::LoadLocal(f_slot));
2434        self.emit(Op::LoadLocal(arg_slot));
2435        let nid = self.pool.node_id("n_hov");
2436        self.emit(Op::CallClosure { arity: 1, node_id_idx: nid });
2437        if wrap_result {
2438            self.emit(Op::MakeVariant { name_idx: wrap_idx, arity: 1 });
2439        }
2440        let j_end = self.code.len();
2441        self.emit(Op::Jump(0));
2442
2443        // Skip arm: stack already has [v] from the failed Dup; nothing to do.
2444        let skip_target = self.code.len() as i32;
2445        if let Op::JumpIfNot(off) = &mut self.code[j_skip] {
2446            *off = skip_target - (j_skip as i32 + 1);
2447        }
2448
2449        let end_target = self.code.len() as i32;
2450        if let Op::Jump(off) = &mut self.code[j_end] {
2451            *off = end_target - (j_end as i32 + 1);
2452        }
2453    }
2454
2455    /// Sibling of `emit_variant_map` for the recovery combinators
2456    /// `result.or_else` and `option.or_else`. Differences from
2457    /// `emit_variant_map`:
2458    ///   - matches on the *negative* variant (`Err` / `None`)
2459    ///   - the closure's result becomes the call's result directly,
2460    ///     with no wrapping (it is itself a `Result` / `Option`)
2461    ///   - `option.or_else`'s closure takes zero args (`None` has no
2462    ///     payload to forward)
2463    fn emit_variant_or_else(
2464        &mut self,
2465        args: &[a::CExpr],
2466        match_on: &str,
2467        closure_arity: u16,
2468    ) {
2469        let match_idx = self.pool.variant(match_on);
2470
2471        self.compile_expr(&args[0], false);
2472        let val_slot = self.alloc_local("__hoe");
2473        self.emit(Op::StoreLocal(val_slot));
2474
2475        self.compile_expr(&args[1], false);
2476        let f_slot = self.alloc_local("__hoe_f");
2477        self.emit(Op::StoreLocal(f_slot));
2478
2479        // Stack discipline mirrors emit_variant_map:
2480        //   load val      ⇒ [v]
2481        //   dup           ⇒ [v, v]
2482        //   test          ⇒ [v, Bool]
2483        //   jumpifnot     ⇒ [v]
2484        // The unmatched arm leaves [v] (Ok/Some unchanged); the
2485        // matched arm pops [v] and pushes the closure's result.
2486        self.emit(Op::LoadLocal(val_slot));
2487        self.emit(Op::Dup);
2488        self.emit(Op::TestVariant(match_idx));
2489        let j_skip = self.code.len();
2490        self.emit(Op::JumpIfNot(0));
2491
2492        // Matched arm: pop the duplicate left on the stack,
2493        // then call the closure with whatever payload it expects.
2494        self.emit(Op::Pop);
2495        self.emit(Op::LoadLocal(f_slot));
2496        if closure_arity == 1 {
2497            self.emit(Op::LoadLocal(val_slot));
2498            self.emit(Op::GetVariantArg(0));
2499        }
2500        let nid = self.pool.node_id("n_hoe");
2501        self.emit(Op::CallClosure { arity: closure_arity, node_id_idx: nid });
2502
2503        let j_end = self.code.len();
2504        self.emit(Op::Jump(0));
2505
2506        // Unmatched arm: stack already holds [v]; nothing to do.
2507        let skip_target = self.code.len() as i32;
2508        if let Op::JumpIfNot(off) = &mut self.code[j_skip] {
2509            *off = skip_target - (j_skip as i32 + 1);
2510        }
2511
2512        let end_target = self.code.len() as i32;
2513        if let Op::Jump(off) = &mut self.code[j_end] {
2514            *off = end_target - (j_end as i32 + 1);
2515        }
2516    }
2517
2518    /// `option.unwrap_or_else(opt, f)` — lazy default via zero-arg thunk.
2519    ///   Some(x) → x          (unwrap; no wrapping)
2520    ///   None    → f()        (call thunk; return its result directly)
2521    fn emit_option_unwrap_or_else(&mut self, args: &[a::CExpr]) {
2522        let some_idx = self.pool.variant("Some");
2523
2524        // Compile opt and f; stash both so they're accessible on both arms.
2525        self.compile_expr(&args[0], false);
2526        let val_slot = self.alloc_local("__uoe_val");
2527        self.emit(Op::StoreLocal(val_slot));
2528
2529        self.compile_expr(&args[1], false);
2530        let f_slot = self.alloc_local("__uoe_f");
2531        self.emit(Op::StoreLocal(f_slot));
2532
2533        // Test whether opt is Some.
2534        //   load val ⇒ [v]
2535        //   dup      ⇒ [v, v]
2536        //   test     ⇒ [v, Bool]
2537        //   jumpifnot → None arm
2538        self.emit(Op::LoadLocal(val_slot));
2539        self.emit(Op::Dup);
2540        self.emit(Op::TestVariant(some_idx));
2541        let j_none = self.code.len();
2542        self.emit(Op::JumpIfNot(0));
2543
2544        // Some arm: extract the payload from [v] left on the stack.
2545        self.emit(Op::GetVariantArg(0));
2546        let j_end = self.code.len();
2547        self.emit(Op::Jump(0));
2548
2549        // None arm: pop the [v] duplicate, call the thunk.
2550        let none_target = self.code.len() as i32;
2551        if let Op::JumpIfNot(off) = &mut self.code[j_none] {
2552            *off = none_target - (j_none as i32 + 1);
2553        }
2554        self.emit(Op::Pop);
2555        self.emit(Op::LoadLocal(f_slot));
2556        let nid = self.pool.node_id("n_uoe");
2557        self.emit(Op::CallClosure { arity: 0, node_id_idx: nid });
2558
2559        // Patch jump-to-end from Some arm.
2560        let end_target = self.code.len() as i32;
2561        if let Op::Jump(off) = &mut self.code[j_end] {
2562            *off = end_target - (j_end as i32 + 1);
2563        }
2564    }
2565
2566    // ---- std.flow trampolines ----------------------------------------
2567    //
2568    // Each flow.<op>(c1, c2, ...) call site:
2569    //   1. compiles its closure args and leaves them on the stack
2570    //   2. registers a fresh "trampoline" Function whose body invokes
2571    //      those captured closures appropriately
2572    //   3. emits MakeClosure { fn_id: trampoline, capture_count: N }
2573    //
2574    // The trampoline's parameter layout is [capture_0, ..., capture_{N-1},
2575    // arg_0, ...]: captures first, the closure's own args after.
2576
2577    /// Allocate a fresh fn_id for a trampoline and install its bytecode.
2578    /// Trampolines are the one Function-creation path that already has
2579    /// the body in hand at install time (top-level fns and lambdas have
2580    /// it filled in later), so we compute `body_hash` immediately. The
2581    /// final hash pass at the end of `compile_program` is a no-op here.
2582    fn install_trampoline(&mut self, name: &str, arity: u16, locals_count: u16, code: Vec<Op>) -> u32 {
2583        let fn_id = self.next_fn_id.len() as u32;
2584        let body_hash = crate::program::compute_body_hash(
2585            arity, locals_count, &code, &self.pool.record_shapes);
2586        self.next_fn_id.push(Function {
2587            name: name.into(),
2588            arity,
2589            locals_count,
2590            code,
2591            effects: Vec::new(),
2592            body_hash,
2593            // Trampolines (flow.sequential / parallel / etc.) don't
2594            // surface refined params at this layer.
2595            refinements: Vec::new(),
2596            // Trampolines never emit `Op::GetField` — they're pure
2597            // scaffolding. Leaving this at 0 means the VM allocates
2598            // an empty IC slot.
2599            field_ic_sites: 0,
2600        });
2601        fn_id
2602    }
2603
2604    /// `flow.sequential(f, g)` returns a closure `(x) -> g(f(x))`.
2605    fn emit_flow_sequential(&mut self, args: &[a::CExpr]) {
2606        // Push f, g; build the trampoline closure with 2 captures.
2607        self.compile_expr(&args[0], false);
2608        self.compile_expr(&args[1], false);
2609        let nid = self.pool.node_id("n_flow_sequential");
2610        let code = vec![
2611            // Locals: [f=0, g=1, x=2]
2612            Op::LoadLocal(0),                                  // push f
2613            Op::LoadLocal(2),                                  // push x
2614            Op::CallClosure { arity: 1, node_id_idx: nid },    // r = f(x)
2615            // stack: [r]
2616            Op::StoreLocal(3),                                 // tmp = r
2617            Op::LoadLocal(1),                                  // push g
2618            Op::LoadLocal(3),                                  // push tmp
2619            Op::CallClosure { arity: 1, node_id_idx: nid },    // r = g(tmp)
2620            Op::Return,
2621        ];
2622        let fn_id = self.install_trampoline("__flow_sequential", 3, 4, code);
2623        self.emit(Op::MakeClosure { fn_id, capture_count: 2 });
2624    }
2625
2626    /// `flow.parallel(fa, fb)` returns a closure `() -> (fa(), fb())`.
2627    /// Implementation is sequential: each function is called in order
2628    /// and the results are packed into a 2-tuple. The spec (§11.2)
2629    /// allows the runtime to apply true parallelism here; that needs
2630    /// a thread-safe handler split and is left to a follow-up. The
2631    /// signature is what users program against — sequential vs threaded
2632    /// is an implementation detail invisible to the type system.
2633    fn emit_flow_parallel(&mut self, args: &[a::CExpr]) {
2634        // Push fa, fb; build a 0-arg trampoline closure with 2 captures.
2635        self.compile_expr(&args[0], false);
2636        self.compile_expr(&args[1], false);
2637        let nid = self.pool.node_id("n_flow_parallel");
2638        let code = vec![
2639            // Locals: [fa=0, fb=1]
2640            Op::LoadLocal(0),                                  // push fa
2641            Op::CallClosure { arity: 0, node_id_idx: nid },    // a = fa()
2642            Op::LoadLocal(1),                                  // push fb
2643            Op::CallClosure { arity: 0, node_id_idx: nid },    // b = fb()
2644            Op::MakeTuple(2),                                  // (a, b)
2645            Op::Return,
2646        ];
2647        let fn_id = self.install_trampoline("__flow_parallel", 2, 2, code);
2648        self.emit(Op::MakeClosure { fn_id, capture_count: 2 });
2649    }
2650
2651    /// `flow.parallel_list(actions)` runs each 0-arg closure in `actions`
2652    /// and returns the results as a list in input order. Variadic
2653    /// counterpart to `flow.parallel`. Sequential under the hood — the
2654    /// spec (§11.2) reserves true threading for a future scheduler.
2655    /// Compiled inline (mirrors `list.map`) so closure args can flow
2656    /// through `CallClosure` without a heap-allocated trampoline.
2657    fn emit_flow_parallel_list(&mut self, args: &[a::CExpr]) {
2658        // xs := actions
2659        self.compile_expr(&args[0], false);
2660        let xs = self.alloc_local("__fpl_xs");
2661        self.emit(Op::StoreLocal(xs));
2662
2663        // out := []
2664        self.emit(Op::MakeList(0));
2665        let out = self.alloc_local("__fpl_out");
2666        self.emit(Op::StoreLocal(out));
2667
2668        // i := 0
2669        let zero = self.pool.int(0);
2670        self.emit(Op::PushConst(zero));
2671        let i = self.alloc_local("__fpl_i");
2672        self.emit(Op::StoreLocal(i));
2673
2674        // loop_top: while i < len(xs) { ... }
2675        let loop_top = self.code.len();
2676        self.emit(Op::LoadLocal(i));
2677        self.emit(Op::LoadLocal(xs));
2678        self.emit(Op::GetListLen);
2679        self.emit(Op::IntLt);
2680        let j_exit = self.code.len();
2681        self.emit(Op::JumpIfNot(0));
2682
2683        // body: out := out ++ [xs[i]()]
2684        let nid = self.pool.node_id("n_flow_parallel_list");
2685        self.emit(Op::LoadLocal(out));
2686        self.emit(Op::LoadLocal(xs));
2687        self.emit(Op::LoadLocal(i));
2688        self.emit(Op::GetListElemDyn);
2689        self.emit(Op::CallClosure { arity: 0, node_id_idx: nid });
2690        self.emit(Op::ListAppend);
2691        self.emit(Op::StoreLocal(out));
2692
2693        // i := i + 1
2694        self.emit(Op::LoadLocal(i));
2695        let one = self.pool.int(1);
2696        self.emit(Op::PushConst(one));
2697        self.emit(Op::IntAdd);
2698        self.emit(Op::StoreLocal(i));
2699
2700        // jump back
2701        let jump_back = self.code.len();
2702        let back = (loop_top as i32) - (jump_back as i32 + 1);
2703        self.emit(Op::Jump(back));
2704
2705        // exit: patch j_exit, push out
2706        let exit_target = self.code.len() as i32;
2707        if let Op::JumpIfNot(off) = &mut self.code[j_exit] {
2708            *off = exit_target - (j_exit as i32 + 1);
2709        }
2710        self.emit(Op::LoadLocal(out));
2711    }
2712
2713    /// `flow.branch(cond, t, f)` returns a closure `(x) -> if cond(x) then t(x) else f(x)`.
2714    fn emit_flow_branch(&mut self, args: &[a::CExpr]) {
2715        self.compile_expr(&args[0], false);
2716        self.compile_expr(&args[1], false);
2717        self.compile_expr(&args[2], false);
2718        let nid = self.pool.node_id("n_flow_branch");
2719        let mut code = vec![
2720            // Locals: [cond=0, t=1, f=2, x=3]
2721            Op::LoadLocal(0),                               // push cond
2722            Op::LoadLocal(3),                               // push x
2723            Op::CallClosure { arity: 1, node_id_idx: nid }, // bool
2724        ];
2725        let j_false = code.len();
2726        code.push(Op::JumpIfNot(0));                        // patched
2727        // true arm: t(x)
2728        code.push(Op::LoadLocal(1));
2729        code.push(Op::LoadLocal(3));
2730        code.push(Op::CallClosure { arity: 1, node_id_idx: nid });
2731        code.push(Op::Return);
2732        // false arm
2733        let false_target = code.len() as i32;
2734        if let Op::JumpIfNot(off) = &mut code[j_false] {
2735            *off = false_target - (j_false as i32 + 1);
2736        }
2737        code.push(Op::LoadLocal(2));
2738        code.push(Op::LoadLocal(3));
2739        code.push(Op::CallClosure { arity: 1, node_id_idx: nid });
2740        code.push(Op::Return);
2741
2742        let fn_id = self.install_trampoline("__flow_branch", 4, 4, code);
2743        self.emit(Op::MakeClosure { fn_id, capture_count: 3 });
2744    }
2745
2746    /// `flow.retry(f, max_attempts)` returns a closure `(x) -> Result[U, E]`
2747    /// that calls `f(x)` up to `max_attempts` times, returning the first
2748    /// `Ok` or the final `Err`.
2749    fn emit_flow_retry(&mut self, args: &[a::CExpr]) {
2750        self.compile_expr(&args[0], false);
2751        self.compile_expr(&args[1], false);
2752        let call_nid = self.pool.node_id("n_flow_retry");
2753        let ok_idx = self.pool.variant("Ok");
2754        let zero_const = self.pool.int(0);
2755        let one_const = self.pool.int(1);
2756        // Locals: [f=0, max=1, x=2, i=3, last=4]
2757        let mut code = vec![
2758            // i := 0
2759            Op::PushConst(zero_const),
2760            Op::StoreLocal(3),
2761        ];
2762        // loop_top: while i < max
2763        let loop_top = code.len() as i32;
2764        code.push(Op::LoadLocal(3));
2765        code.push(Op::LoadLocal(1));
2766        code.push(Op::IntLt);
2767        let j_done = code.len();
2768        code.push(Op::JumpIfNot(0));                       // patched
2769
2770        // body: r := f(x); last := r
2771        code.push(Op::LoadLocal(0));
2772        code.push(Op::LoadLocal(2));
2773        code.push(Op::CallClosure { arity: 1, node_id_idx: call_nid });
2774        code.push(Op::StoreLocal(4));
2775
2776        // Test variant Ok on last; if so, return last.
2777        code.push(Op::LoadLocal(4));
2778        code.push(Op::TestVariant(ok_idx));
2779        let j_was_err = code.len();
2780        code.push(Op::JumpIfNot(0));                       // patched: skip return
2781        code.push(Op::LoadLocal(4));
2782        code.push(Op::Return);
2783
2784        // was_err: i := i + 1; jump loop_top
2785        let was_err_target = code.len() as i32;
2786        if let Op::JumpIfNot(off) = &mut code[j_was_err] {
2787            *off = was_err_target - (j_was_err as i32 + 1);
2788        }
2789        code.push(Op::LoadLocal(3));
2790        code.push(Op::PushConst(one_const));
2791        code.push(Op::IntAdd);
2792        code.push(Op::StoreLocal(3));
2793        let pc_after_jump = code.len() as i32 + 1;
2794        code.push(Op::Jump(loop_top - pc_after_jump));
2795
2796        // done: return last (the final Err, or Unit if max=0).
2797        let done_target = code.len() as i32;
2798        if let Op::JumpIfNot(off) = &mut code[j_done] {
2799            *off = done_target - (j_done as i32 + 1);
2800        }
2801        code.push(Op::LoadLocal(4));
2802        code.push(Op::Return);
2803
2804        let fn_id = self.install_trampoline("__flow_retry", 3, 5, code);
2805        self.emit(Op::MakeClosure { fn_id, capture_count: 2 });
2806    }
2807
2808    /// `flow.retry_with_backoff(f, attempts, base_ms)` (#226). Variant
2809    /// of `flow.retry` that sleeps between attempts. The first
2810    /// attempt fires immediately; attempt k > 1 waits `base_ms *
2811    /// 2^(k-2)` ms before retrying. Sleeps go through
2812    /// `time.sleep_ms`, which is why the resulting closure carries
2813    /// `[time]` in its effect row even though the underlying `f` is
2814    /// pure.
2815    fn emit_flow_retry_with_backoff(&mut self, args: &[a::CExpr]) {
2816        // Push captures: f, max, base_ms. The trampoline takes one
2817        // call-time arg `x`, so capture_count = 3, arity = 4.
2818        self.compile_expr(&args[0], false);
2819        self.compile_expr(&args[1], false);
2820        self.compile_expr(&args[2], false);
2821        let call_nid    = self.pool.node_id("n_flow_retry_backoff");
2822        let sleep_nid   = self.pool.node_id("n_flow_retry_backoff_sleep");
2823        let kind_idx    = self.pool.str("time");
2824        let op_idx      = self.pool.str("sleep_ms");
2825        let ok_idx      = self.pool.variant("Ok");
2826        let zero_const  = self.pool.int(0);
2827        let one_const   = self.pool.int(1);
2828        let two_const   = self.pool.int(2);
2829        // Locals layout:
2830        //   0=f, 1=max, 2=base_ms (captures)
2831        //   3=x (arg)
2832        //   4=i, 5=last, 6=next_delay (working state)
2833        let mut code = vec![
2834            // next_delay := base_ms
2835            Op::LoadLocal(2),
2836            Op::StoreLocal(6),
2837            // i := 0
2838            Op::PushConst(zero_const),
2839            Op::StoreLocal(4),
2840        ];
2841
2842        let loop_top = code.len() as i32;
2843        // while i < max
2844        code.push(Op::LoadLocal(4));
2845        code.push(Op::LoadLocal(1));
2846        code.push(Op::IntLt);
2847        let j_done = code.len();
2848        code.push(Op::JumpIfNot(0)); // patched
2849
2850        // if i > 0: time.sleep_ms(next_delay); next_delay := next_delay * 2
2851        code.push(Op::PushConst(zero_const));
2852        code.push(Op::LoadLocal(4));
2853        code.push(Op::IntLt);                // 0 < i ?
2854        let j_no_sleep = code.len();
2855        code.push(Op::JumpIfNot(0));         // patched: skip the sleep block
2856        // Sleep
2857        code.push(Op::LoadLocal(6));         // arg = next_delay
2858        code.push(Op::EffectCall {
2859            kind_idx, op_idx, arity: 1, node_id_idx: sleep_nid,
2860        });
2861        code.push(Op::Pop);                  // discard the Unit result
2862        // next_delay := next_delay * 2
2863        code.push(Op::LoadLocal(6));
2864        code.push(Op::PushConst(two_const));
2865        code.push(Op::NumMul);
2866        code.push(Op::StoreLocal(6));
2867        // patch the no-sleep skip
2868        let after_sleep = code.len() as i32;
2869        if let Op::JumpIfNot(off) = &mut code[j_no_sleep] {
2870            *off = after_sleep - (j_no_sleep as i32 + 1);
2871        }
2872
2873        // last := f(x)
2874        code.push(Op::LoadLocal(0));
2875        code.push(Op::LoadLocal(3));
2876        code.push(Op::CallClosure { arity: 1, node_id_idx: call_nid });
2877        code.push(Op::StoreLocal(5));
2878
2879        // if Ok(last): return last
2880        code.push(Op::LoadLocal(5));
2881        code.push(Op::TestVariant(ok_idx));
2882        let j_was_err = code.len();
2883        code.push(Op::JumpIfNot(0)); // patched
2884        code.push(Op::LoadLocal(5));
2885        code.push(Op::Return);
2886
2887        // was_err: i := i + 1; jump loop_top
2888        let was_err_target = code.len() as i32;
2889        if let Op::JumpIfNot(off) = &mut code[j_was_err] {
2890            *off = was_err_target - (j_was_err as i32 + 1);
2891        }
2892        code.push(Op::LoadLocal(4));
2893        code.push(Op::PushConst(one_const));
2894        code.push(Op::IntAdd);
2895        code.push(Op::StoreLocal(4));
2896        let pc_after_jump = code.len() as i32 + 1;
2897        code.push(Op::Jump(loop_top - pc_after_jump));
2898
2899        // done: return last (the final Err, or Unit if max=0).
2900        let done_target = code.len() as i32;
2901        if let Op::JumpIfNot(off) = &mut code[j_done] {
2902            *off = done_target - (j_done as i32 + 1);
2903        }
2904        code.push(Op::LoadLocal(5));
2905        code.push(Op::Return);
2906
2907        let fn_id = self.install_trampoline("__flow_retry_backoff", 4, 7, code);
2908        self.emit(Op::MakeClosure { fn_id, capture_count: 3 });
2909    }
2910}
2911
2912/// Collect free variables referenced in `e` that are not in `bound`.
2913/// Mutates `bound` to track let/lambda introductions during the walk;
2914/// the caller's set is preserved on return because Rust's borrow rules
2915/// force us to clone for sub-scopes that rebind a name.
2916fn free_vars(e: &a::CExpr, bound: &mut std::collections::HashSet<String>, out: &mut Vec<String>) {
2917    match e {
2918        a::CExpr::Literal { .. } => {}
2919        a::CExpr::Var { name } => {
2920            if !bound.contains(name) && !out.contains(name) {
2921                out.push(name.clone());
2922            }
2923        }
2924        a::CExpr::Call { callee, args } => {
2925            free_vars(callee, bound, out);
2926            for a in args { free_vars(a, bound, out); }
2927        }
2928        a::CExpr::Let { name, value, body, .. } => {
2929            free_vars(value, bound, out);
2930            let was_bound = bound.contains(name);
2931            bound.insert(name.clone());
2932            free_vars(body, bound, out);
2933            if !was_bound { bound.remove(name); }
2934        }
2935        a::CExpr::Match { scrutinee, arms } => {
2936            free_vars(scrutinee, bound, out);
2937            for arm in arms {
2938                let mut local_bound = bound.clone();
2939                pattern_binders(&arm.pattern, &mut local_bound);
2940                free_vars(&arm.body, &mut local_bound, out);
2941            }
2942        }
2943        a::CExpr::Block { statements, result } => {
2944            let mut local_bound = bound.clone();
2945            for s in statements { free_vars(s, &mut local_bound, out); }
2946            free_vars(result, &mut local_bound, out);
2947        }
2948        a::CExpr::Constructor { args, .. } => {
2949            for a in args { free_vars(a, bound, out); }
2950        }
2951        a::CExpr::RecordLit { fields } => {
2952            for f in fields { free_vars(&f.value, bound, out); }
2953        }
2954        a::CExpr::TupleLit { items } | a::CExpr::ListLit { items } => {
2955            for it in items { free_vars(it, bound, out); }
2956        }
2957        a::CExpr::FieldAccess { value, .. } => free_vars(value, bound, out),
2958        a::CExpr::Lambda { params, body, .. } => {
2959            let mut inner = bound.clone();
2960            for p in params { inner.insert(p.name.clone()); }
2961            free_vars(body, &mut inner, out);
2962        }
2963        a::CExpr::BinOp { lhs, rhs, .. } => {
2964            free_vars(lhs, bound, out);
2965            free_vars(rhs, bound, out);
2966        }
2967        a::CExpr::UnaryOp { expr, .. } => free_vars(expr, bound, out),
2968        a::CExpr::Return { value } => free_vars(value, bound, out),
2969    }
2970}
2971
2972fn pattern_binders(p: &a::Pattern, bound: &mut std::collections::HashSet<String>) {
2973    match p {
2974        a::Pattern::PWild | a::Pattern::PLiteral { .. } => {}
2975        a::Pattern::PVar { name } => { bound.insert(name.clone()); }
2976        a::Pattern::PConstructor { args, .. } => {
2977            for a in args { pattern_binders(a, bound); }
2978        }
2979        a::Pattern::PRecord { fields } => {
2980            for f in fields { pattern_binders(&f.pattern, bound); }
2981        }
2982        a::Pattern::PTuple { items } => {
2983            for it in items { pattern_binders(it, bound); }
2984        }
2985    }
2986}