lex_bytecode/op.rs
1//! Bytecode instruction set per spec §8.2.
2
3use serde::{Deserialize, Serialize};
4
5/// Constant pool entry.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub enum Const {
8 Int(i64),
9 Float(f64),
10 Bool(bool),
11 Str(String),
12 Bytes(Vec<u8>),
13 Unit,
14 /// A field name, used by MAKE_RECORD/GET_FIELD.
15 FieldName(String),
16 /// A variant tag, used by MAKE_VARIANT/TEST_VARIANT/GET_VARIANT.
17 VariantName(String),
18 /// An AST NodeId, attached to Call / EffectCall for trace keying (§10.1).
19 NodeId(String),
20}
21
22#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
23pub enum Op {
24 // stack manipulation
25 PushConst(u32),
26 Pop,
27 Dup,
28
29 // locals
30 LoadLocal(u16),
31 StoreLocal(u16),
32
33 // constructors / pattern matching
34 /// Builds a record by interning its field-name shape in
35 /// `Program.record_shapes` (#461). `shape_idx` indexes that
36 /// side-table; `field_count` is `shape.len()` cached inline so the
37 /// stack-effect verifier can compute its delta without needing a
38 /// `Program` reference. The VM pops `field_count` values off the
39 /// stack and pairs them with `Program.record_shapes[shape_idx]`.
40 ///
41 /// Externalizing the field-name vec is what lets `Op` be `Copy`,
42 /// which is the precondition for direct-threaded dispatch
43 /// (`code[pc]` becomes a register-sized read instead of an
44 /// every-step `Vec` clone).
45 MakeRecord { shape_idx: u32, field_count: u16 },
46 /// Stack-allocated record (#464). Same shape semantics as
47 /// `MakeRecord` — pops `field_count` field values and pairs them
48 /// with `Program.record_shapes[shape_idx]` — but the field values
49 /// are stored in the current frame's slab inside the VM's
50 /// `stack_record_arena`, not in a heap-allocated `IndexMap`. The
51 /// stack pushes a `Value::StackRecord` whose `slab_start` indexes
52 /// into the arena.
53 ///
54 /// Emitted by the compiler in place of `MakeRecord` at sites that
55 /// `escape::build_escape_index` proves do not escape the frame
56 /// (returned, captured, stored into another aggregate, or passed
57 /// to a call). Runtime fallback: when the frame's
58 /// `stack_record_budget_remaining` is exhausted, the op silently
59 /// degrades to the heap path (identical observable effect to
60 /// `MakeRecord`), so a single function can mix stack and heap
61 /// records without compile-time partitioning.
62 ///
63 /// `body_hash` stability (#222): canonical encoding decodes this
64 /// op back to the historical `{"MakeRecord":{"field_name_indices":
65 /// [...]}}` form, so closure identity is invariant under the
66 /// step-2 lowering.
67 AllocStackRecord { shape_idx: u32, field_count: u16 },
68 /// Request-scoped arena record (#463 slice 2a). Same shape semantics
69 /// as `MakeRecord` and `AllocStackRecord` — pops `field_count` field
70 /// values, pairs them with `Program.record_shapes[shape_idx]`, and
71 /// pushes a record handle — but the fields live in the VM's
72 /// **request-scoped** `arena_slab`, not the per-frame stack-record
73 /// arena, so the value can outlive the allocating frame as long as
74 /// the surrounding request scope (opened by
75 /// `EffectHandler::enter_request_scope`) is still active. The
76 /// resulting `Value::ArenaRecord` indexes the slab.
77 ///
78 /// Emitted (slice 2b) at sites `arena::build_arena_index` proves do
79 /// not escape the request scope. Runtime fallback: when **no** scope
80 /// is active (e.g. a non-handler context calls a function that was
81 /// compiled with arena lowering), the op silently degrades to the
82 /// `MakeRecord` heap path — identical observable effect.
83 ///
84 /// `body_hash` stability (#222): canonical encoding decodes back to
85 /// `{"MakeRecord":{"field_name_indices":[...]}}`, so closure
86 /// identity is invariant under the lowering, mirroring
87 /// `AllocStackRecord`.
88 AllocArenaRecord { shape_idx: u32, field_count: u16 },
89 MakeTuple(u16),
90 /// Frame-local tuple (#464 tuple codegen). Stack-alloc analogue of
91 /// `MakeTuple`: pops `arity` values into the VM's stack-record
92 /// arena and pushes a `Value::StackTuple` whose `slab_start`
93 /// indexes the arena. Emitted by the compiler in place of
94 /// `MakeTuple` at sites `escape::build_escape_index` proves do not
95 /// escape the frame. Runtime fallback to the heap `Value::Tuple`
96 /// path when the frame's stack-record budget is exhausted —
97 /// identical observable effect, so stack and heap tuples can mix
98 /// within one function. `body_hash` stability (#222): canonical
99 /// encoding decodes this op back to `MakeTuple(arity)`, so closure
100 /// identity is invariant under the lowering.
101 AllocStackTuple { arity: u16 },
102 /// Request-scoped arena tuple (#463 slice 2a). Tuple analogue of
103 /// `AllocArenaRecord`: pops `arity` values into the VM's
104 /// request-scoped `arena_slab` and pushes a `Value::ArenaTuple`
105 /// handle. Same fallback rule (no active scope → `MakeTuple` heap
106 /// path) and same `body_hash` invariance (decodes back to
107 /// `MakeTuple(arity)`).
108 AllocArenaTuple { arity: u16 },
109 MakeList(u32),
110 MakeVariant { name_idx: u32, arity: u16 },
111 /// Record field access. `name_idx` indexes a `Const::FieldName`
112 /// in the constant pool — the field to read. `site_idx` is a
113 /// stable per-function index assigned by the compiler at emit
114 /// time (#462 slice 1), keyed into the per-fn inline-cache table
115 /// in the VM. Replaces the pre-#462 `(fn_id << 32 | pc)` IC key
116 /// so the cache survives the future dispatch rewrite (#461) and
117 /// a JIT (#465). `body_hash` stability: the canonical encoding
118 /// drops `site_idx` and serializes as the historical `GetField(u32)`
119 /// tuple form, so closure identity (#222) is unchanged.
120 GetField { name_idx: u32, site_idx: u32 },
121 GetElem(u16), // tuple element index
122 TestVariant(u32), // pushes Bool: top-of-stack matches variant name?
123 GetVariant(u32), // extracts payload (replaces variant on stack with its args list)
124 GetVariantArg(u16), // pop variant, push its i'th arg
125 GetListLen,
126 GetListElem(u32),
127 /// Pop [list, value]; push list with `value` appended.
128 ListAppend,
129 /// Pop list; push it indexed by the integer on top.
130 /// Stack: [list, idx] → [list[idx]]. (Like GetListElem(u32) but
131 /// the index is dynamic.)
132 GetListElemDyn,
133
134 // control flow
135 Jump(i32),
136 JumpIf(i32), // pops Bool
137 JumpIfNot(i32),
138 Call { fn_id: u32, arity: u16, node_id_idx: u32 },
139 TailCall { fn_id: u32, arity: u16, node_id_idx: u32 },
140 /// Build a Value::Closure: pop `capture_count` values (in order) and
141 /// pair them with `fn_id`.
142 MakeClosure { fn_id: u32, capture_count: u16 },
143 /// Call a closure: pop `arity` args + 1 closure (top of stack), invoke.
144 CallClosure { arity: u16, node_id_idx: u32 },
145 /// Stable sort-by-key (#338). Stack: `[xs, f]` (xs underneath).
146 /// Pops the key-fn `f` and the list `xs`, applies `f` to each
147 /// element to derive a sortable key, returns the list reordered
148 /// so keys ascend. Keys must be one of `Int` / `Float` / `Str`;
149 /// other key types pair-wise compare as equal (preserving
150 /// insertion order). `node_id_idx` is the originating NodeId.
151 SortByKey { node_id_idx: u32 },
152 /// Parallel map (#305 slice 1). Stack: `[xs, f]` (xs underneath).
153 /// Pops the closure `f` and the list `xs`, applies `f` to each
154 /// element in parallel via OS threads, pushes the result list
155 /// in input order. `node_id_idx` is the originating NodeId for
156 /// trace keying. The pool size is capped by
157 /// `LEX_PAR_MAX_CONCURRENCY` (default = available CPU cores).
158 ///
159 /// Slice 1 limitation: closures invoking effects fail at
160 /// runtime with `VmError::Effect`. The per-thread effect handler
161 /// split is queued as slice 2.
162 ParallelMap { node_id_idx: u32 },
163 /// Map a list (#464 list-builder fast path). Stack: `[xs, f]` (xs
164 /// underneath). Pops the closure `f` and list `xs`, applies `f` to
165 /// each element, pushes the result list. Native opcode (mirrors
166 /// `SortByKey`/`ParallelMap`) rather than an inlined bytecode loop:
167 /// the loop form re-`LoadLocal`'d (cloned) the whole input and
168 /// accumulator lists each iteration — O(n²) — whereas the VM here
169 /// owns `xs` and builds the output with one pre-sized allocation.
170 ListMap { node_id_idx: u32 },
171 /// Filter a list (#464). Stack: `[xs, pred]`. Pops `pred` and `xs`,
172 /// keeps the elements for which `pred(x)` is true. Native, same
173 /// rationale as `ListMap`.
174 ListFilter { node_id_idx: u32 },
175 /// Left-fold a list (#464). Stack: `[xs, init, f]` (xs deepest).
176 /// Pops `f`, `init`, `xs`; threads `acc = f(acc, x)` from `init`
177 /// over the elements; pushes the final `acc`. Native, same
178 /// rationale as `ListMap`.
179 ListFold { node_id_idx: u32 },
180 /// EFFECT_CALL `<effect_kind_const_idx>` `<op_name_const_idx>` `<arity>`.
181 /// Pops `arity` args, dispatches to a host effect handler, pushes result.
182 /// `node_id_idx` points to a `Const::NodeId` for trace keying.
183 EffectCall { kind_idx: u32, op_idx: u32, arity: u16, node_id_idx: u32 },
184 Return,
185 Panic(u32), // pushes constant message and aborts
186
187 // arithmetic — typed (per spec §8.2). `NumAdd`/etc. dispatch on operand
188 // type at runtime; emitted when the compiler doesn't have type info.
189 // The post-M5 plan is to lower NumAdd → IntAdd|FloatAdd in a typed pass.
190 IntAdd, IntSub, IntMul, IntDiv, IntMod, IntNeg,
191 IntEq, IntLt, IntLe,
192 FloatAdd, FloatSub, FloatMul, FloatDiv, FloatNeg,
193 FloatEq, FloatLt, FloatLe,
194 NumAdd, NumSub, NumMul, NumDiv, NumMod, NumNeg,
195 NumEq, NumLt, NumLe,
196 BoolAnd, BoolOr, BoolNot,
197
198 // strings
199 StrConcat, StrLen, StrEq,
200 BytesLen, BytesEq,
201
202 // superinstructions (#461)
203 //
204 // Fused opcodes emitted by the compiler's peephole pass to skip
205 // dispatch on common multi-op patterns. The pass leaves the
206 // original primitive ops in place at the trailing slots — the
207 // dispatch loop overrides its default `pc += 1` to step past
208 // them. Keeping `code.len()` invariant means existing
209 // Jump/JumpIf offsets remain valid without a renumbering pass.
210 /// Fused `LoadLocal(local_idx) + PushConst(imm_const_idx) +
211 /// IntAdd`. `imm_const_idx` must point to a `Const::Int`. The
212 /// dispatch arm reads the local, adds the constant, pushes the
213 /// result, and advances pc by 3 (past this op and the two
214 /// inert PushConst + IntAdd slots that follow). For
215 /// `body_hash` stability (#222) the canonical encoding decomposes
216 /// this op back to a standalone `LoadLocal(local_idx)` at hash
217 /// time; the unchanged PushConst / IntAdd at the next two
218 /// slots hash normally, so the total bytes match pre-fusion.
219 LoadLocalAddIntConst { local_idx: u16, imm_const_idx: u32 },
220 /// Fused `LoadLocal(src) + PushConst(imm_const_idx) + IntAdd +
221 /// StoreLocal(dest)` (#461 superinstruction slice 2). Bypasses
222 /// the value stack entirely: reads `locals[src]`, adds the Int
223 /// constant, writes `locals[dest]`. Advances pc by 4. Stack
224 /// delta: 0.
225 ///
226 /// The peephole pass that emits this op runs *after* slice 1,
227 /// looking for `[LoadLocalAddIntConst, ., ., StoreLocal]` where
228 /// the middle two slots are slice-1 tombstones (the original
229 /// PushConst + IntAdd). The verifier and the body-hash decoder
230 /// both treat the 3 following slots as tombstones owned by
231 /// this op.
232 LoadLocalAddIntConstStoreLocal { src: u16, imm_const_idx: u32, dest: u16 },
233 /// Fused `LoadLocal(lhs_idx) + LoadLocal(rhs_idx) + IntAdd`
234 /// (#461 superinstruction slice 3). The binary-op-on-two-locals
235 /// idiom — fires on any `a + b` where both operands are
236 /// statically-typed `Int` locals (e.g. `acc + n` in tail-recursive
237 /// accumulator loops). Reads `locals[lhs_idx]` and `locals[rhs_idx]`,
238 /// pushes the sum, advances pc by 3. Stack delta: +1.
239 ///
240 /// `body_hash` stability (#222): canonical encoding decomposes
241 /// back to a standalone `LoadLocal(lhs_idx)`. The unchanged
242 /// `LoadLocal(rhs_idx)` + `IntAdd` tombstones at pc+1 and pc+2
243 /// hash normally, so the total bytes match the pre-fusion form.
244 /// Verifier walks the tombstones as if live: their deltas
245 /// (+1 LoadLocal, -1 IntAdd) cancel, matching the unfused depth
246 /// at pc+3.
247 LoadLocalAddLocal { lhs_idx: u16, rhs_idx: u16 },
248 /// Fused `LoadLocal(lhs_idx) + LoadLocal(rhs_idx) + IntSub`
249 /// (#461 superinstruction slice 4). Sibling of `LoadLocalAddLocal`
250 /// for the typed `Int` subtraction binop — fires on any `a - b`
251 /// where both operands are `Int` locals. Reads `locals[lhs_idx]`
252 /// and `locals[rhs_idx]`, pushes `lhs - rhs`, advances pc by 3.
253 /// Stack delta: +1. Tombstone + body-hash story matches
254 /// `LoadLocalAddLocal` exactly.
255 LoadLocalSubLocal { lhs_idx: u16, rhs_idx: u16 },
256 /// Fused `LoadLocal(lhs_idx) + LoadLocal(rhs_idx) + IntMul`
257 /// (#461 superinstruction slice 4). Sibling of `LoadLocalAddLocal`
258 /// for the typed `Int` multiplication binop. Same shape: reads
259 /// the two Int locals, pushes `lhs * rhs`, advances pc by 3.
260 /// Stack delta: +1. Tombstone + body-hash story matches
261 /// `LoadLocalAddLocal` exactly.
262 LoadLocalMulLocal { lhs_idx: u16, rhs_idx: u16 },
263 /// Fused `LoadLocal(local_idx) + PushConst(imm_const_idx) +
264 /// IntEq + JumpIfNot(offset)` (#461 superinstruction slice 5,
265 /// pattern-match arm-test idiom). Fires on every numeric
266 /// pattern arm test — `match n { 0 => acc; _ => recurse }` and
267 /// the cascade of integer-literal arms in any pattern match —
268 /// after `compile_pattern_test` lowers the historical NumEq to
269 /// IntEq for Int-literal patterns. Reads the local, compares
270 /// against the Int constant; if equal, advances pc by 4 (past
271 /// the 3 tombstones, into the arm body); if not equal, jumps
272 /// to `pc + 4 + jump_offset` (the JumpIfNot's original target —
273 /// the next arm test or the panic-no-match block). Stack
274 /// delta: 0 (original sequence had +1, +1, -1, -1).
275 ///
276 /// Jump-aware peephole — slice 5 is the first fusion that
277 /// absorbs a control-flow op. The verifier walks the fused op
278 /// with both fall-through and branch successors, skipping past
279 /// the trailing three tombstones (mirroring slice 2's 4-slot
280 /// pattern). `body_hash` decodes back to a standalone
281 /// `LoadLocal(local_idx)`; the trailing primitive ops stay in
282 /// the code stream as tombstones and hash normally — so
283 /// closure identity (#222) stays invariant.
284 LoadLocalEqIntConstJumpIfNot { local_idx: u16, imm_const_idx: u32, jump_offset: i32 },
285 /// Fused `LoadLocal(src) + StoreLocal(dst) +
286 /// LoadLocalEqIntConstJumpIfNot { local_idx: dst, ... }` (#461
287 /// superinstruction slice 6). Absorbs the match-scrutinee dance
288 /// — the `LoadLocal + StoreLocal` the compiler emits to bind the
289 /// match expression to a fresh local before each arm's pattern
290 /// test reads it back. Reads `locals[src]`, mirrors the original
291 /// `StoreLocal(dst)` by writing the same value into `locals[dst]`
292 /// (so the SECOND and later arm tests in the same match still
293 /// see the scrutinee at the expected slot), then compares against
294 /// the constant. Equal → advance pc by 6 (skip past the 5
295 /// tombstones — original StoreLocal + slice-5 fused op + slice-5's
296 /// 3 trailing tombstones). Not equal → jump to
297 /// `pc + 6 + jump_offset` (the original JumpIfNot's target;
298 /// `jump_offset` is copied unchanged from the slice-5 op).
299 /// Stack delta: 0.
300 ///
301 /// `body_hash` decodes back to a standalone `LoadLocal(src)`;
302 /// the trailing 5 ops (StoreLocal, the slice-5 fused op
303 /// decoded as LoadLocal(dst), PushConst, IntEq, JumpIfNot) stay
304 /// in the code stream as tombstones and hash normally.
305 LoadLocalStoreEqIntConstJumpIfNot {
306 src: u16, dst: u16, imm_const_idx: u32, jump_offset: i32,
307 },
308 /// Fused `LoadLocal(local_idx) + GetField{name_idx, site_idx} +
309 /// IntAdd` (#461 superinstruction slice 7). Fires on the
310 /// `acc + r.field` accumulator-with-field-read idiom — the
311 /// shape any `expr + record.field` lowers to when the LHS is
312 /// already on the stack and the RHS is a same-frame record
313 /// field. After #464 step 2 dropped the IndexMap allocation
314 /// from hot-path records, this fusion is the next dispatch-
315 /// overhead bottleneck on the `response_build` profile.
316 ///
317 /// Dispatch: pops the prior stack top (an Int), reads
318 /// `locals[local_idx]`, performs the polymorphic-IC GetField
319 /// lookup keyed by `(fn_id, site_idx)` against `name_idx`,
320 /// adds the field value to the popped Int, pushes the result,
321 /// advances pc by 3.
322 ///
323 /// Stack delta: +1 (matches a bare `LoadLocal`). The trailing
324 /// `GetField` (delta 0) and `IntAdd` (delta -1) stay in the
325 /// code stream as inert tombstones; the verifier walks them as
326 /// live and their cancelling deltas leave depth at pc+3
327 /// matching the unfused form.
328 ///
329 /// `body_hash` stability (#222): canonical encoding decomposes
330 /// to a standalone `LoadLocal(local_idx)`; the unchanged
331 /// `GetField` and `IntAdd` at pc+1 and pc+2 hash normally, so
332 /// the total bytes match pre-fusion. The trailing `GetField`'s
333 /// own body-hash decoding (which strips `site_idx`) means the
334 /// hash is unchanged across recompiles where IC-site numbering
335 /// differs.
336 ///
337 /// Safety: the trailing two slots must not be jump targets
338 /// (standard tombstone rule). The first slot may be a target —
339 /// the fused op there is live.
340 LoadLocalGetFieldAdd { local_idx: u16, name_idx: u32, site_idx: u32 },
341 /// Slice 8 of #461: `IntSub` / `IntMul` siblings of slice 7's
342 /// `LoadLocalGetFieldAdd`. Fuse `LoadLocal + GetField + IntSub`
343 /// and `LoadLocal + GetField + IntMul` respectively — the
344 /// `acc - r.field` and `acc * r.field` idioms. Same tombstone,
345 /// jump-safety, body-hash (decode to `LoadLocal(local_idx)`),
346 /// and verifier (+1 delta) story as slice 7.
347 ///
348 /// `IntSub` is not commutative: the unfused sequence leaves the
349 /// field value on top, so `IntSub`'s deeper-minus-top semantics
350 /// give `acc - field`. The fused dispatch preserves that order.
351 LoadLocalGetFieldSub { local_idx: u16, name_idx: u32, site_idx: u32 },
352 LoadLocalGetFieldMul { local_idx: u16, name_idx: u32, site_idx: u32 },
353 /// Slice 9 of #461: fuse `LoadLocal(local_idx) + GetField{name_idx,
354 /// site_idx}` — the bare `record.field` read, the single most
355 /// common field-access shape. Unlike slices 7/8 there's no
356 /// arithmetic terminator; this is a 2-op window.
357 ///
358 /// The win is allocation, not just dispatch: the unfused pair
359 /// `LoadLocal` clones the entire record onto the value stack
360 /// (a `Box<IndexMap>` for a heap record), `GetField` pops it,
361 /// reads one field, and drops the rest. The fused op reads the
362 /// field out of the local by reference (`read_local_record_field`)
363 /// and clones only that one value. On the `response_build`
364 /// profile the whole-record clone+drop of the returned `Response`
365 /// (`r.total`) was the dominant malloc source.
366 ///
367 /// Stack delta: +1 (LoadLocal +1, GetField 0). The trailing
368 /// `GetField` stays as a single inert tombstone (delta 0); the
369 /// verifier walks it, leaving depth at pc+2 matching the unfused
370 /// `[LoadLocal, GetField]` pair. `body_hash` decodes to a
371 /// standalone `LoadLocal(local_idx)`; the trailing `GetField`
372 /// hashes normally.
373 ///
374 /// Safety: the trailing slot (the original `GetField`) must not
375 /// be a jump target. The first slot may be.
376 LoadLocalGetField { local_idx: u16, name_idx: u32, site_idx: u32 },
377}