Skip to main content

lex_bytecode/
op.rs

1//! Bytecode instruction set per spec §8.2.
2
3use serde::{Deserialize, Serialize};
4
5/// Constant pool entry.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub enum Const {
8    Int(i64),
9    Float(f64),
10    Bool(bool),
11    Str(String),
12    Bytes(Vec<u8>),
13    Unit,
14    /// A field name, used by MAKE_RECORD/GET_FIELD.
15    FieldName(String),
16    /// A variant tag, used by MAKE_VARIANT/TEST_VARIANT/GET_VARIANT.
17    VariantName(String),
18    /// An AST NodeId, attached to Call / EffectCall for trace keying (§10.1).
19    NodeId(String),
20}
21
22#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
23pub enum Op {
24    // stack manipulation
25    PushConst(u32),
26    Pop,
27    Dup,
28
29    // locals
30    LoadLocal(u16),
31    StoreLocal(u16),
32
33    // constructors / pattern matching
34    /// Builds a record by interning its field-name shape in
35    /// `Program.record_shapes` (#461). `shape_idx` indexes that
36    /// side-table; `field_count` is `shape.len()` cached inline so the
37    /// stack-effect verifier can compute its delta without needing a
38    /// `Program` reference. The VM pops `field_count` values off the
39    /// stack and pairs them with `Program.record_shapes[shape_idx]`.
40    ///
41    /// Externalizing the field-name vec is what lets `Op` be `Copy`,
42    /// which is the precondition for direct-threaded dispatch
43    /// (`code[pc]` becomes a register-sized read instead of an
44    /// every-step `Vec` clone).
45    MakeRecord { shape_idx: u32, field_count: u16 },
46    /// Stack-allocated record (#464). Same shape semantics as
47    /// `MakeRecord` — pops `field_count` field values and pairs them
48    /// with `Program.record_shapes[shape_idx]` — but the field values
49    /// are stored in the current frame's slab inside the VM's
50    /// `stack_record_arena`, not in a heap-allocated `IndexMap`. The
51    /// stack pushes a `Value::StackRecord` whose `slab_start` indexes
52    /// into the arena.
53    ///
54    /// Emitted by the compiler in place of `MakeRecord` at sites that
55    /// `escape::build_escape_index` proves do not escape the frame
56    /// (returned, captured, stored into another aggregate, or passed
57    /// to a call). Runtime fallback: when the frame's
58    /// `stack_record_budget_remaining` is exhausted, the op silently
59    /// degrades to the heap path (identical observable effect to
60    /// `MakeRecord`), so a single function can mix stack and heap
61    /// records without compile-time partitioning.
62    ///
63    /// `body_hash` stability (#222): canonical encoding decodes this
64    /// op back to the historical `{"MakeRecord":{"field_name_indices":
65    /// [...]}}` form, so closure identity is invariant under the
66    /// step-2 lowering.
67    AllocStackRecord { shape_idx: u32, field_count: u16 },
68    /// Request-scoped arena record (#463 slice 2a). Same shape semantics
69    /// as `MakeRecord` and `AllocStackRecord` — pops `field_count` field
70    /// values, pairs them with `Program.record_shapes[shape_idx]`, and
71    /// pushes a record handle — but the fields live in the VM's
72    /// **request-scoped** `arena_slab`, not the per-frame stack-record
73    /// arena, so the value can outlive the allocating frame as long as
74    /// the surrounding request scope (opened by
75    /// `EffectHandler::enter_request_scope`) is still active. The
76    /// resulting `Value::ArenaRecord` indexes the slab.
77    ///
78    /// Emitted (slice 2b) at sites `arena::build_arena_index` proves do
79    /// not escape the request scope. Runtime fallback: when **no** scope
80    /// is active (e.g. a non-handler context calls a function that was
81    /// compiled with arena lowering), the op silently degrades to the
82    /// `MakeRecord` heap path — identical observable effect.
83    ///
84    /// `body_hash` stability (#222): canonical encoding decodes back to
85    /// `{"MakeRecord":{"field_name_indices":[...]}}`, so closure
86    /// identity is invariant under the lowering, mirroring
87    /// `AllocStackRecord`.
88    AllocArenaRecord { shape_idx: u32, field_count: u16 },
89    MakeTuple(u16),
90    /// Frame-local tuple (#464 tuple codegen). Stack-alloc analogue of
91    /// `MakeTuple`: pops `arity` values into the VM's stack-record
92    /// arena and pushes a `Value::StackTuple` whose `slab_start`
93    /// indexes the arena. Emitted by the compiler in place of
94    /// `MakeTuple` at sites `escape::build_escape_index` proves do not
95    /// escape the frame. Runtime fallback to the heap `Value::Tuple`
96    /// path when the frame's stack-record budget is exhausted —
97    /// identical observable effect, so stack and heap tuples can mix
98    /// within one function. `body_hash` stability (#222): canonical
99    /// encoding decodes this op back to `MakeTuple(arity)`, so closure
100    /// identity is invariant under the lowering.
101    AllocStackTuple { arity: u16 },
102    /// Request-scoped arena tuple (#463 slice 2a). Tuple analogue of
103    /// `AllocArenaRecord`: pops `arity` values into the VM's
104    /// request-scoped `arena_slab` and pushes a `Value::ArenaTuple`
105    /// handle. Same fallback rule (no active scope → `MakeTuple` heap
106    /// path) and same `body_hash` invariance (decodes back to
107    /// `MakeTuple(arity)`).
108    AllocArenaTuple { arity: u16 },
109    MakeList(u32),
110    MakeVariant { name_idx: u32, arity: u16 },
111    /// Record field access. `name_idx` indexes a `Const::FieldName`
112    /// in the constant pool — the field to read. `site_idx` is a
113    /// stable per-function index assigned by the compiler at emit
114    /// time (#462 slice 1), keyed into the per-fn inline-cache table
115    /// in the VM. Replaces the pre-#462 `(fn_id << 32 | pc)` IC key
116    /// so the cache survives the future dispatch rewrite (#461) and
117    /// a JIT (#465). `body_hash` stability: the canonical encoding
118    /// drops `site_idx` and serializes as the historical `GetField(u32)`
119    /// tuple form, so closure identity (#222) is unchanged.
120    GetField { name_idx: u32, site_idx: u32 },
121    GetElem(u16),          // tuple element index
122    TestVariant(u32),      // pushes Bool: top-of-stack matches variant name?
123    GetVariant(u32),       // extracts payload (replaces variant on stack with its args list)
124    GetVariantArg(u16),    // pop variant, push its i'th arg
125    GetListLen,
126    GetListElem(u32),
127    /// Pop [list, value]; push list with `value` appended.
128    ListAppend,
129    /// Pop list; push it indexed by the integer on top.
130    /// Stack: [list, idx] → [list[idx]]. (Like GetListElem(u32) but
131    /// the index is dynamic.)
132    GetListElemDyn,
133
134    // control flow
135    Jump(i32),
136    JumpIf(i32),     // pops Bool
137    JumpIfNot(i32),
138    Call { fn_id: u32, arity: u16, node_id_idx: u32 },
139    TailCall { fn_id: u32, arity: u16, node_id_idx: u32 },
140    /// Build a Value::Closure: pop `capture_count` values (in order) and
141    /// pair them with `fn_id`.
142    MakeClosure { fn_id: u32, capture_count: u16 },
143    /// Call a closure: pop `arity` args + 1 closure (top of stack), invoke.
144    CallClosure { arity: u16, node_id_idx: u32 },
145    /// Stable sort-by-key (#338). Stack: `[xs, f]` (xs underneath).
146    /// Pops the key-fn `f` and the list `xs`, applies `f` to each
147    /// element to derive a sortable key, returns the list reordered
148    /// so keys ascend. Keys must be one of `Int` / `Float` / `Str`;
149    /// other key types pair-wise compare as equal (preserving
150    /// insertion order). `node_id_idx` is the originating NodeId.
151    SortByKey { node_id_idx: u32 },
152    /// Parallel map (#305 slice 1). Stack: `[xs, f]` (xs underneath).
153    /// Pops the closure `f` and the list `xs`, applies `f` to each
154    /// element in parallel via OS threads, pushes the result list
155    /// in input order. `node_id_idx` is the originating NodeId for
156    /// trace keying. The pool size is capped by
157    /// `LEX_PAR_MAX_CONCURRENCY` (default = available CPU cores).
158    ///
159    /// Slice 1 limitation: closures invoking effects fail at
160    /// runtime with `VmError::Effect`. The per-thread effect handler
161    /// split is queued as slice 2.
162    ParallelMap { node_id_idx: u32 },
163    /// Map a list (#464 list-builder fast path). Stack: `[xs, f]` (xs
164    /// underneath). Pops the closure `f` and list `xs`, applies `f` to
165    /// each element, pushes the result list. Native opcode (mirrors
166    /// `SortByKey`/`ParallelMap`) rather than an inlined bytecode loop:
167    /// the loop form re-`LoadLocal`'d (cloned) the whole input and
168    /// accumulator lists each iteration — O(n²) — whereas the VM here
169    /// owns `xs` and builds the output with one pre-sized allocation.
170    ListMap { node_id_idx: u32 },
171    /// Filter a list (#464). Stack: `[xs, pred]`. Pops `pred` and `xs`,
172    /// keeps the elements for which `pred(x)` is true. Native, same
173    /// rationale as `ListMap`.
174    ListFilter { node_id_idx: u32 },
175    /// Left-fold a list (#464). Stack: `[xs, init, f]` (xs deepest).
176    /// Pops `f`, `init`, `xs`; threads `acc = f(acc, x)` from `init`
177    /// over the elements; pushes the final `acc`. Native, same
178    /// rationale as `ListMap`.
179    ListFold { node_id_idx: u32 },
180    /// EFFECT_CALL `<effect_kind_const_idx>` `<op_name_const_idx>` `<arity>`.
181    /// Pops `arity` args, dispatches to a host effect handler, pushes result.
182    /// `node_id_idx` points to a `Const::NodeId` for trace keying.
183    EffectCall { kind_idx: u32, op_idx: u32, arity: u16, node_id_idx: u32 },
184    Return,
185    Panic(u32),     // pushes constant message and aborts
186
187    // arithmetic — typed (per spec §8.2). `NumAdd`/etc. dispatch on operand
188    // type at runtime; emitted when the compiler doesn't have type info.
189    // The post-M5 plan is to lower NumAdd → IntAdd|FloatAdd in a typed pass.
190    IntAdd, IntSub, IntMul, IntDiv, IntMod, IntNeg,
191    IntEq, IntLt, IntLe,
192    FloatAdd, FloatSub, FloatMul, FloatDiv, FloatNeg,
193    FloatEq, FloatLt, FloatLe,
194    NumAdd, NumSub, NumMul, NumDiv, NumMod, NumNeg,
195    NumEq, NumLt, NumLe,
196    BoolAnd, BoolOr, BoolNot,
197
198    // strings
199    StrConcat, StrLen, StrEq,
200    BytesLen, BytesEq,
201
202    // superinstructions (#461)
203    //
204    // Fused opcodes emitted by the compiler's peephole pass to skip
205    // dispatch on common multi-op patterns. The pass leaves the
206    // original primitive ops in place at the trailing slots — the
207    // dispatch loop overrides its default `pc += 1` to step past
208    // them. Keeping `code.len()` invariant means existing
209    // Jump/JumpIf offsets remain valid without a renumbering pass.
210    /// Fused `LoadLocal(local_idx) + PushConst(imm_const_idx) +
211    /// IntAdd`. `imm_const_idx` must point to a `Const::Int`. The
212    /// dispatch arm reads the local, adds the constant, pushes the
213    /// result, and advances pc by 3 (past this op and the two
214    /// inert PushConst + IntAdd slots that follow). For
215    /// `body_hash` stability (#222) the canonical encoding decomposes
216    /// this op back to a standalone `LoadLocal(local_idx)` at hash
217    /// time; the unchanged PushConst / IntAdd at the next two
218    /// slots hash normally, so the total bytes match pre-fusion.
219    LoadLocalAddIntConst { local_idx: u16, imm_const_idx: u32 },
220    /// Fused `LoadLocal(src) + PushConst(imm_const_idx) + IntAdd +
221    /// StoreLocal(dest)` (#461 superinstruction slice 2). Bypasses
222    /// the value stack entirely: reads `locals[src]`, adds the Int
223    /// constant, writes `locals[dest]`. Advances pc by 4. Stack
224    /// delta: 0.
225    ///
226    /// The peephole pass that emits this op runs *after* slice 1,
227    /// looking for `[LoadLocalAddIntConst, ., ., StoreLocal]` where
228    /// the middle two slots are slice-1 tombstones (the original
229    /// PushConst + IntAdd). The verifier and the body-hash decoder
230    /// both treat the 3 following slots as tombstones owned by
231    /// this op.
232    LoadLocalAddIntConstStoreLocal { src: u16, imm_const_idx: u32, dest: u16 },
233    /// Fused `LoadLocal(lhs_idx) + LoadLocal(rhs_idx) + IntAdd`
234    /// (#461 superinstruction slice 3). The binary-op-on-two-locals
235    /// idiom — fires on any `a + b` where both operands are
236    /// statically-typed `Int` locals (e.g. `acc + n` in tail-recursive
237    /// accumulator loops). Reads `locals[lhs_idx]` and `locals[rhs_idx]`,
238    /// pushes the sum, advances pc by 3. Stack delta: +1.
239    ///
240    /// `body_hash` stability (#222): canonical encoding decomposes
241    /// back to a standalone `LoadLocal(lhs_idx)`. The unchanged
242    /// `LoadLocal(rhs_idx)` + `IntAdd` tombstones at pc+1 and pc+2
243    /// hash normally, so the total bytes match the pre-fusion form.
244    /// Verifier walks the tombstones as if live: their deltas
245    /// (+1 LoadLocal, -1 IntAdd) cancel, matching the unfused depth
246    /// at pc+3.
247    LoadLocalAddLocal { lhs_idx: u16, rhs_idx: u16 },
248    /// Fused `LoadLocal(lhs_idx) + LoadLocal(rhs_idx) + IntSub`
249    /// (#461 superinstruction slice 4). Sibling of `LoadLocalAddLocal`
250    /// for the typed `Int` subtraction binop — fires on any `a - b`
251    /// where both operands are `Int` locals. Reads `locals[lhs_idx]`
252    /// and `locals[rhs_idx]`, pushes `lhs - rhs`, advances pc by 3.
253    /// Stack delta: +1. Tombstone + body-hash story matches
254    /// `LoadLocalAddLocal` exactly.
255    LoadLocalSubLocal { lhs_idx: u16, rhs_idx: u16 },
256    /// Fused `LoadLocal(lhs_idx) + LoadLocal(rhs_idx) + IntMul`
257    /// (#461 superinstruction slice 4). Sibling of `LoadLocalAddLocal`
258    /// for the typed `Int` multiplication binop. Same shape: reads
259    /// the two Int locals, pushes `lhs * rhs`, advances pc by 3.
260    /// Stack delta: +1. Tombstone + body-hash story matches
261    /// `LoadLocalAddLocal` exactly.
262    LoadLocalMulLocal { lhs_idx: u16, rhs_idx: u16 },
263    /// Fused `LoadLocal(local_idx) + PushConst(imm_const_idx) +
264    /// IntEq + JumpIfNot(offset)` (#461 superinstruction slice 5,
265    /// pattern-match arm-test idiom). Fires on every numeric
266    /// pattern arm test — `match n { 0 => acc; _ => recurse }` and
267    /// the cascade of integer-literal arms in any pattern match —
268    /// after `compile_pattern_test` lowers the historical NumEq to
269    /// IntEq for Int-literal patterns. Reads the local, compares
270    /// against the Int constant; if equal, advances pc by 4 (past
271    /// the 3 tombstones, into the arm body); if not equal, jumps
272    /// to `pc + 4 + jump_offset` (the JumpIfNot's original target —
273    /// the next arm test or the panic-no-match block). Stack
274    /// delta: 0 (original sequence had +1, +1, -1, -1).
275    ///
276    /// Jump-aware peephole — slice 5 is the first fusion that
277    /// absorbs a control-flow op. The verifier walks the fused op
278    /// with both fall-through and branch successors, skipping past
279    /// the trailing three tombstones (mirroring slice 2's 4-slot
280    /// pattern). `body_hash` decodes back to a standalone
281    /// `LoadLocal(local_idx)`; the trailing primitive ops stay in
282    /// the code stream as tombstones and hash normally — so
283    /// closure identity (#222) stays invariant.
284    LoadLocalEqIntConstJumpIfNot { local_idx: u16, imm_const_idx: u32, jump_offset: i32 },
285    /// Fused `LoadLocal(src) + StoreLocal(dst) +
286    /// LoadLocalEqIntConstJumpIfNot { local_idx: dst, ... }` (#461
287    /// superinstruction slice 6). Absorbs the match-scrutinee dance
288    /// — the `LoadLocal + StoreLocal` the compiler emits to bind the
289    /// match expression to a fresh local before each arm's pattern
290    /// test reads it back. Reads `locals[src]`, mirrors the original
291    /// `StoreLocal(dst)` by writing the same value into `locals[dst]`
292    /// (so the SECOND and later arm tests in the same match still
293    /// see the scrutinee at the expected slot), then compares against
294    /// the constant. Equal → advance pc by 6 (skip past the 5
295    /// tombstones — original StoreLocal + slice-5 fused op + slice-5's
296    /// 3 trailing tombstones). Not equal → jump to
297    /// `pc + 6 + jump_offset` (the original JumpIfNot's target;
298    /// `jump_offset` is copied unchanged from the slice-5 op).
299    /// Stack delta: 0.
300    ///
301    /// `body_hash` decodes back to a standalone `LoadLocal(src)`;
302    /// the trailing 5 ops (StoreLocal, the slice-5 fused op
303    /// decoded as LoadLocal(dst), PushConst, IntEq, JumpIfNot) stay
304    /// in the code stream as tombstones and hash normally.
305    LoadLocalStoreEqIntConstJumpIfNot {
306        src: u16, dst: u16, imm_const_idx: u32, jump_offset: i32,
307    },
308    /// Fused `LoadLocal(local_idx) + GetField{name_idx, site_idx} +
309    /// IntAdd` (#461 superinstruction slice 7). Fires on the
310    /// `acc + r.field` accumulator-with-field-read idiom — the
311    /// shape any `expr + record.field` lowers to when the LHS is
312    /// already on the stack and the RHS is a same-frame record
313    /// field. After #464 step 2 dropped the IndexMap allocation
314    /// from hot-path records, this fusion is the next dispatch-
315    /// overhead bottleneck on the `response_build` profile.
316    ///
317    /// Dispatch: pops the prior stack top (an Int), reads
318    /// `locals[local_idx]`, performs the polymorphic-IC GetField
319    /// lookup keyed by `(fn_id, site_idx)` against `name_idx`,
320    /// adds the field value to the popped Int, pushes the result,
321    /// advances pc by 3.
322    ///
323    /// Stack delta: +1 (matches a bare `LoadLocal`). The trailing
324    /// `GetField` (delta 0) and `IntAdd` (delta -1) stay in the
325    /// code stream as inert tombstones; the verifier walks them as
326    /// live and their cancelling deltas leave depth at pc+3
327    /// matching the unfused form.
328    ///
329    /// `body_hash` stability (#222): canonical encoding decomposes
330    /// to a standalone `LoadLocal(local_idx)`; the unchanged
331    /// `GetField` and `IntAdd` at pc+1 and pc+2 hash normally, so
332    /// the total bytes match pre-fusion. The trailing `GetField`'s
333    /// own body-hash decoding (which strips `site_idx`) means the
334    /// hash is unchanged across recompiles where IC-site numbering
335    /// differs.
336    ///
337    /// Safety: the trailing two slots must not be jump targets
338    /// (standard tombstone rule). The first slot may be a target —
339    /// the fused op there is live.
340    LoadLocalGetFieldAdd { local_idx: u16, name_idx: u32, site_idx: u32 },
341    /// Slice 8 of #461: `IntSub` / `IntMul` siblings of slice 7's
342    /// `LoadLocalGetFieldAdd`. Fuse `LoadLocal + GetField + IntSub`
343    /// and `LoadLocal + GetField + IntMul` respectively — the
344    /// `acc - r.field` and `acc * r.field` idioms. Same tombstone,
345    /// jump-safety, body-hash (decode to `LoadLocal(local_idx)`),
346    /// and verifier (+1 delta) story as slice 7.
347    ///
348    /// `IntSub` is not commutative: the unfused sequence leaves the
349    /// field value on top, so `IntSub`'s deeper-minus-top semantics
350    /// give `acc - field`. The fused dispatch preserves that order.
351    LoadLocalGetFieldSub { local_idx: u16, name_idx: u32, site_idx: u32 },
352    LoadLocalGetFieldMul { local_idx: u16, name_idx: u32, site_idx: u32 },
353    /// Slice 9 of #461: fuse `LoadLocal(local_idx) + GetField{name_idx,
354    /// site_idx}` — the bare `record.field` read, the single most
355    /// common field-access shape. Unlike slices 7/8 there's no
356    /// arithmetic terminator; this is a 2-op window.
357    ///
358    /// The win is allocation, not just dispatch: the unfused pair
359    /// `LoadLocal` clones the entire record onto the value stack
360    /// (a `Box<IndexMap>` for a heap record), `GetField` pops it,
361    /// reads one field, and drops the rest. The fused op reads the
362    /// field out of the local by reference (`read_local_record_field`)
363    /// and clones only that one value. On the `response_build`
364    /// profile the whole-record clone+drop of the returned `Response`
365    /// (`r.total`) was the dominant malloc source.
366    ///
367    /// Stack delta: +1 (LoadLocal +1, GetField 0). The trailing
368    /// `GetField` stays as a single inert tombstone (delta 0); the
369    /// verifier walks it, leaving depth at pc+2 matching the unfused
370    /// `[LoadLocal, GetField]` pair. `body_hash` decodes to a
371    /// standalone `LoadLocal(local_idx)`; the trailing `GetField`
372    /// hashes normally.
373    ///
374    /// Safety: the trailing slot (the original `GetField`) must not
375    /// be a jump target. The first slot may be.
376    LoadLocalGetField { local_idx: u16, name_idx: u32, site_idx: u32 },
377}