lex_bytecode/op.rs
1//! Bytecode instruction set per spec §8.2.
2
3use serde::{Deserialize, Serialize};
4
5/// Constant pool entry.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub enum Const {
8 Int(i64),
9 Float(f64),
10 Bool(bool),
11 Str(String),
12 Bytes(Vec<u8>),
13 Unit,
14 /// A field name, used by MAKE_RECORD/GET_FIELD.
15 FieldName(String),
16 /// A variant tag, used by MAKE_VARIANT/TEST_VARIANT/GET_VARIANT.
17 VariantName(String),
18 /// An AST NodeId, attached to Call / EffectCall for trace keying (§10.1).
19 NodeId(String),
20}
21
22#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
23pub enum Op {
24 // stack manipulation
25 PushConst(u32),
26 Pop,
27 Dup,
28
29 // locals
30 LoadLocal(u16),
31 StoreLocal(u16),
32
33 // constructors / pattern matching
34 /// Builds a record by interning its field-name shape in
35 /// `Program.record_shapes` (#461). `shape_idx` indexes that
36 /// side-table; `field_count` is `shape.len()` cached inline so the
37 /// stack-effect verifier can compute its delta without needing a
38 /// `Program` reference. The VM pops `field_count` values off the
39 /// stack and pairs them with `Program.record_shapes[shape_idx]`.
40 ///
41 /// Externalizing the field-name vec is what lets `Op` be `Copy`,
42 /// which is the precondition for direct-threaded dispatch
43 /// (`code[pc]` becomes a register-sized read instead of an
44 /// every-step `Vec` clone).
45 MakeRecord { shape_idx: u32, field_count: u16 },
46 /// Stack-allocated record (#464). Same shape semantics as
47 /// `MakeRecord` — pops `field_count` field values and pairs them
48 /// with `Program.record_shapes[shape_idx]` — but the field values
49 /// are stored in the current frame's slab inside the VM's
50 /// `stack_record_arena`, not in a heap-allocated `IndexMap`. The
51 /// stack pushes a `Value::StackRecord` whose `slab_start` indexes
52 /// into the arena.
53 ///
54 /// Emitted by the compiler in place of `MakeRecord` at sites that
55 /// `escape::build_escape_index` proves do not escape the frame
56 /// (returned, captured, stored into another aggregate, or passed
57 /// to a call). Runtime fallback: when the frame's
58 /// `stack_record_budget_remaining` is exhausted, the op silently
59 /// degrades to the heap path (identical observable effect to
60 /// `MakeRecord`), so a single function can mix stack and heap
61 /// records without compile-time partitioning.
62 ///
63 /// `body_hash` stability (#222): canonical encoding decodes this
64 /// op back to the historical `{"MakeRecord":{"field_name_indices":
65 /// [...]}}` form, so closure identity is invariant under the
66 /// step-2 lowering.
67 AllocStackRecord { shape_idx: u32, field_count: u16 },
68 MakeTuple(u16),
69 /// Frame-local tuple (#464 tuple codegen). Stack-alloc analogue of
70 /// `MakeTuple`: pops `arity` values into the VM's stack-record
71 /// arena and pushes a `Value::StackTuple` whose `slab_start`
72 /// indexes the arena. Emitted by the compiler in place of
73 /// `MakeTuple` at sites `escape::build_escape_index` proves do not
74 /// escape the frame. Runtime fallback to the heap `Value::Tuple`
75 /// path when the frame's stack-record budget is exhausted —
76 /// identical observable effect, so stack and heap tuples can mix
77 /// within one function. `body_hash` stability (#222): canonical
78 /// encoding decodes this op back to `MakeTuple(arity)`, so closure
79 /// identity is invariant under the lowering.
80 AllocStackTuple { arity: u16 },
81 MakeList(u32),
82 MakeVariant { name_idx: u32, arity: u16 },
83 /// Record field access. `name_idx` indexes a `Const::FieldName`
84 /// in the constant pool — the field to read. `site_idx` is a
85 /// stable per-function index assigned by the compiler at emit
86 /// time (#462 slice 1), keyed into the per-fn inline-cache table
87 /// in the VM. Replaces the pre-#462 `(fn_id << 32 | pc)` IC key
88 /// so the cache survives the future dispatch rewrite (#461) and
89 /// a JIT (#465). `body_hash` stability: the canonical encoding
90 /// drops `site_idx` and serializes as the historical `GetField(u32)`
91 /// tuple form, so closure identity (#222) is unchanged.
92 GetField { name_idx: u32, site_idx: u32 },
93 GetElem(u16), // tuple element index
94 TestVariant(u32), // pushes Bool: top-of-stack matches variant name?
95 GetVariant(u32), // extracts payload (replaces variant on stack with its args list)
96 GetVariantArg(u16), // pop variant, push its i'th arg
97 GetListLen,
98 GetListElem(u32),
99 /// Pop [list, value]; push list with `value` appended.
100 ListAppend,
101 /// Pop list; push it indexed by the integer on top.
102 /// Stack: [list, idx] → [list[idx]]. (Like GetListElem(u32) but
103 /// the index is dynamic.)
104 GetListElemDyn,
105
106 // control flow
107 Jump(i32),
108 JumpIf(i32), // pops Bool
109 JumpIfNot(i32),
110 Call { fn_id: u32, arity: u16, node_id_idx: u32 },
111 TailCall { fn_id: u32, arity: u16, node_id_idx: u32 },
112 /// Build a Value::Closure: pop `capture_count` values (in order) and
113 /// pair them with `fn_id`.
114 MakeClosure { fn_id: u32, capture_count: u16 },
115 /// Call a closure: pop `arity` args + 1 closure (top of stack), invoke.
116 CallClosure { arity: u16, node_id_idx: u32 },
117 /// Stable sort-by-key (#338). Stack: `[xs, f]` (xs underneath).
118 /// Pops the key-fn `f` and the list `xs`, applies `f` to each
119 /// element to derive a sortable key, returns the list reordered
120 /// so keys ascend. Keys must be one of `Int` / `Float` / `Str`;
121 /// other key types pair-wise compare as equal (preserving
122 /// insertion order). `node_id_idx` is the originating NodeId.
123 SortByKey { node_id_idx: u32 },
124 /// Parallel map (#305 slice 1). Stack: `[xs, f]` (xs underneath).
125 /// Pops the closure `f` and the list `xs`, applies `f` to each
126 /// element in parallel via OS threads, pushes the result list
127 /// in input order. `node_id_idx` is the originating NodeId for
128 /// trace keying. The pool size is capped by
129 /// `LEX_PAR_MAX_CONCURRENCY` (default = available CPU cores).
130 ///
131 /// Slice 1 limitation: closures invoking effects fail at
132 /// runtime with `VmError::Effect`. The per-thread effect handler
133 /// split is queued as slice 2.
134 ParallelMap { node_id_idx: u32 },
135 /// Map a list (#464 list-builder fast path). Stack: `[xs, f]` (xs
136 /// underneath). Pops the closure `f` and list `xs`, applies `f` to
137 /// each element, pushes the result list. Native opcode (mirrors
138 /// `SortByKey`/`ParallelMap`) rather than an inlined bytecode loop:
139 /// the loop form re-`LoadLocal`'d (cloned) the whole input and
140 /// accumulator lists each iteration — O(n²) — whereas the VM here
141 /// owns `xs` and builds the output with one pre-sized allocation.
142 ListMap { node_id_idx: u32 },
143 /// Filter a list (#464). Stack: `[xs, pred]`. Pops `pred` and `xs`,
144 /// keeps the elements for which `pred(x)` is true. Native, same
145 /// rationale as `ListMap`.
146 ListFilter { node_id_idx: u32 },
147 /// Left-fold a list (#464). Stack: `[xs, init, f]` (xs deepest).
148 /// Pops `f`, `init`, `xs`; threads `acc = f(acc, x)` from `init`
149 /// over the elements; pushes the final `acc`. Native, same
150 /// rationale as `ListMap`.
151 ListFold { node_id_idx: u32 },
152 /// EFFECT_CALL `<effect_kind_const_idx>` `<op_name_const_idx>` `<arity>`.
153 /// Pops `arity` args, dispatches to a host effect handler, pushes result.
154 /// `node_id_idx` points to a `Const::NodeId` for trace keying.
155 EffectCall { kind_idx: u32, op_idx: u32, arity: u16, node_id_idx: u32 },
156 Return,
157 Panic(u32), // pushes constant message and aborts
158
159 // arithmetic — typed (per spec §8.2). `NumAdd`/etc. dispatch on operand
160 // type at runtime; emitted when the compiler doesn't have type info.
161 // The post-M5 plan is to lower NumAdd → IntAdd|FloatAdd in a typed pass.
162 IntAdd, IntSub, IntMul, IntDiv, IntMod, IntNeg,
163 IntEq, IntLt, IntLe,
164 FloatAdd, FloatSub, FloatMul, FloatDiv, FloatNeg,
165 FloatEq, FloatLt, FloatLe,
166 NumAdd, NumSub, NumMul, NumDiv, NumMod, NumNeg,
167 NumEq, NumLt, NumLe,
168 BoolAnd, BoolOr, BoolNot,
169
170 // strings
171 StrConcat, StrLen, StrEq,
172 BytesLen, BytesEq,
173
174 // superinstructions (#461)
175 //
176 // Fused opcodes emitted by the compiler's peephole pass to skip
177 // dispatch on common multi-op patterns. The pass leaves the
178 // original primitive ops in place at the trailing slots — the
179 // dispatch loop overrides its default `pc += 1` to step past
180 // them. Keeping `code.len()` invariant means existing
181 // Jump/JumpIf offsets remain valid without a renumbering pass.
182 /// Fused `LoadLocal(local_idx) + PushConst(imm_const_idx) +
183 /// IntAdd`. `imm_const_idx` must point to a `Const::Int`. The
184 /// dispatch arm reads the local, adds the constant, pushes the
185 /// result, and advances pc by 3 (past this op and the two
186 /// inert PushConst + IntAdd slots that follow). For
187 /// `body_hash` stability (#222) the canonical encoding decomposes
188 /// this op back to a standalone `LoadLocal(local_idx)` at hash
189 /// time; the unchanged PushConst / IntAdd at the next two
190 /// slots hash normally, so the total bytes match pre-fusion.
191 LoadLocalAddIntConst { local_idx: u16, imm_const_idx: u32 },
192 /// Fused `LoadLocal(src) + PushConst(imm_const_idx) + IntAdd +
193 /// StoreLocal(dest)` (#461 superinstruction slice 2). Bypasses
194 /// the value stack entirely: reads `locals[src]`, adds the Int
195 /// constant, writes `locals[dest]`. Advances pc by 4. Stack
196 /// delta: 0.
197 ///
198 /// The peephole pass that emits this op runs *after* slice 1,
199 /// looking for `[LoadLocalAddIntConst, ., ., StoreLocal]` where
200 /// the middle two slots are slice-1 tombstones (the original
201 /// PushConst + IntAdd). The verifier and the body-hash decoder
202 /// both treat the 3 following slots as tombstones owned by
203 /// this op.
204 LoadLocalAddIntConstStoreLocal { src: u16, imm_const_idx: u32, dest: u16 },
205 /// Fused `LoadLocal(lhs_idx) + LoadLocal(rhs_idx) + IntAdd`
206 /// (#461 superinstruction slice 3). The binary-op-on-two-locals
207 /// idiom — fires on any `a + b` where both operands are
208 /// statically-typed `Int` locals (e.g. `acc + n` in tail-recursive
209 /// accumulator loops). Reads `locals[lhs_idx]` and `locals[rhs_idx]`,
210 /// pushes the sum, advances pc by 3. Stack delta: +1.
211 ///
212 /// `body_hash` stability (#222): canonical encoding decomposes
213 /// back to a standalone `LoadLocal(lhs_idx)`. The unchanged
214 /// `LoadLocal(rhs_idx)` + `IntAdd` tombstones at pc+1 and pc+2
215 /// hash normally, so the total bytes match the pre-fusion form.
216 /// Verifier walks the tombstones as if live: their deltas
217 /// (+1 LoadLocal, -1 IntAdd) cancel, matching the unfused depth
218 /// at pc+3.
219 LoadLocalAddLocal { lhs_idx: u16, rhs_idx: u16 },
220 /// Fused `LoadLocal(lhs_idx) + LoadLocal(rhs_idx) + IntSub`
221 /// (#461 superinstruction slice 4). Sibling of `LoadLocalAddLocal`
222 /// for the typed `Int` subtraction binop — fires on any `a - b`
223 /// where both operands are `Int` locals. Reads `locals[lhs_idx]`
224 /// and `locals[rhs_idx]`, pushes `lhs - rhs`, advances pc by 3.
225 /// Stack delta: +1. Tombstone + body-hash story matches
226 /// `LoadLocalAddLocal` exactly.
227 LoadLocalSubLocal { lhs_idx: u16, rhs_idx: u16 },
228 /// Fused `LoadLocal(lhs_idx) + LoadLocal(rhs_idx) + IntMul`
229 /// (#461 superinstruction slice 4). Sibling of `LoadLocalAddLocal`
230 /// for the typed `Int` multiplication binop. Same shape: reads
231 /// the two Int locals, pushes `lhs * rhs`, advances pc by 3.
232 /// Stack delta: +1. Tombstone + body-hash story matches
233 /// `LoadLocalAddLocal` exactly.
234 LoadLocalMulLocal { lhs_idx: u16, rhs_idx: u16 },
235 /// Fused `LoadLocal(local_idx) + PushConst(imm_const_idx) +
236 /// IntEq + JumpIfNot(offset)` (#461 superinstruction slice 5,
237 /// pattern-match arm-test idiom). Fires on every numeric
238 /// pattern arm test — `match n { 0 => acc; _ => recurse }` and
239 /// the cascade of integer-literal arms in any pattern match —
240 /// after `compile_pattern_test` lowers the historical NumEq to
241 /// IntEq for Int-literal patterns. Reads the local, compares
242 /// against the Int constant; if equal, advances pc by 4 (past
243 /// the 3 tombstones, into the arm body); if not equal, jumps
244 /// to `pc + 4 + jump_offset` (the JumpIfNot's original target —
245 /// the next arm test or the panic-no-match block). Stack
246 /// delta: 0 (original sequence had +1, +1, -1, -1).
247 ///
248 /// Jump-aware peephole — slice 5 is the first fusion that
249 /// absorbs a control-flow op. The verifier walks the fused op
250 /// with both fall-through and branch successors, skipping past
251 /// the trailing three tombstones (mirroring slice 2's 4-slot
252 /// pattern). `body_hash` decodes back to a standalone
253 /// `LoadLocal(local_idx)`; the trailing primitive ops stay in
254 /// the code stream as tombstones and hash normally — so
255 /// closure identity (#222) stays invariant.
256 LoadLocalEqIntConstJumpIfNot { local_idx: u16, imm_const_idx: u32, jump_offset: i32 },
257 /// Fused `LoadLocal(src) + StoreLocal(dst) +
258 /// LoadLocalEqIntConstJumpIfNot { local_idx: dst, ... }` (#461
259 /// superinstruction slice 6). Absorbs the match-scrutinee dance
260 /// — the `LoadLocal + StoreLocal` the compiler emits to bind the
261 /// match expression to a fresh local before each arm's pattern
262 /// test reads it back. Reads `locals[src]`, mirrors the original
263 /// `StoreLocal(dst)` by writing the same value into `locals[dst]`
264 /// (so the SECOND and later arm tests in the same match still
265 /// see the scrutinee at the expected slot), then compares against
266 /// the constant. Equal → advance pc by 6 (skip past the 5
267 /// tombstones — original StoreLocal + slice-5 fused op + slice-5's
268 /// 3 trailing tombstones). Not equal → jump to
269 /// `pc + 6 + jump_offset` (the original JumpIfNot's target;
270 /// `jump_offset` is copied unchanged from the slice-5 op).
271 /// Stack delta: 0.
272 ///
273 /// `body_hash` decodes back to a standalone `LoadLocal(src)`;
274 /// the trailing 5 ops (StoreLocal, the slice-5 fused op
275 /// decoded as LoadLocal(dst), PushConst, IntEq, JumpIfNot) stay
276 /// in the code stream as tombstones and hash normally.
277 LoadLocalStoreEqIntConstJumpIfNot {
278 src: u16, dst: u16, imm_const_idx: u32, jump_offset: i32,
279 },
280 /// Fused `LoadLocal(local_idx) + GetField{name_idx, site_idx} +
281 /// IntAdd` (#461 superinstruction slice 7). Fires on the
282 /// `acc + r.field` accumulator-with-field-read idiom — the
283 /// shape any `expr + record.field` lowers to when the LHS is
284 /// already on the stack and the RHS is a same-frame record
285 /// field. After #464 step 2 dropped the IndexMap allocation
286 /// from hot-path records, this fusion is the next dispatch-
287 /// overhead bottleneck on the `response_build` profile.
288 ///
289 /// Dispatch: pops the prior stack top (an Int), reads
290 /// `locals[local_idx]`, performs the polymorphic-IC GetField
291 /// lookup keyed by `(fn_id, site_idx)` against `name_idx`,
292 /// adds the field value to the popped Int, pushes the result,
293 /// advances pc by 3.
294 ///
295 /// Stack delta: +1 (matches a bare `LoadLocal`). The trailing
296 /// `GetField` (delta 0) and `IntAdd` (delta -1) stay in the
297 /// code stream as inert tombstones; the verifier walks them as
298 /// live and their cancelling deltas leave depth at pc+3
299 /// matching the unfused form.
300 ///
301 /// `body_hash` stability (#222): canonical encoding decomposes
302 /// to a standalone `LoadLocal(local_idx)`; the unchanged
303 /// `GetField` and `IntAdd` at pc+1 and pc+2 hash normally, so
304 /// the total bytes match pre-fusion. The trailing `GetField`'s
305 /// own body-hash decoding (which strips `site_idx`) means the
306 /// hash is unchanged across recompiles where IC-site numbering
307 /// differs.
308 ///
309 /// Safety: the trailing two slots must not be jump targets
310 /// (standard tombstone rule). The first slot may be a target —
311 /// the fused op there is live.
312 LoadLocalGetFieldAdd { local_idx: u16, name_idx: u32, site_idx: u32 },
313 /// Slice 8 of #461: `IntSub` / `IntMul` siblings of slice 7's
314 /// `LoadLocalGetFieldAdd`. Fuse `LoadLocal + GetField + IntSub`
315 /// and `LoadLocal + GetField + IntMul` respectively — the
316 /// `acc - r.field` and `acc * r.field` idioms. Same tombstone,
317 /// jump-safety, body-hash (decode to `LoadLocal(local_idx)`),
318 /// and verifier (+1 delta) story as slice 7.
319 ///
320 /// `IntSub` is not commutative: the unfused sequence leaves the
321 /// field value on top, so `IntSub`'s deeper-minus-top semantics
322 /// give `acc - field`. The fused dispatch preserves that order.
323 LoadLocalGetFieldSub { local_idx: u16, name_idx: u32, site_idx: u32 },
324 LoadLocalGetFieldMul { local_idx: u16, name_idx: u32, site_idx: u32 },
325 /// Slice 9 of #461: fuse `LoadLocal(local_idx) + GetField{name_idx,
326 /// site_idx}` — the bare `record.field` read, the single most
327 /// common field-access shape. Unlike slices 7/8 there's no
328 /// arithmetic terminator; this is a 2-op window.
329 ///
330 /// The win is allocation, not just dispatch: the unfused pair
331 /// `LoadLocal` clones the entire record onto the value stack
332 /// (a `Box<IndexMap>` for a heap record), `GetField` pops it,
333 /// reads one field, and drops the rest. The fused op reads the
334 /// field out of the local by reference (`read_local_record_field`)
335 /// and clones only that one value. On the `response_build`
336 /// profile the whole-record clone+drop of the returned `Response`
337 /// (`r.total`) was the dominant malloc source.
338 ///
339 /// Stack delta: +1 (LoadLocal +1, GetField 0). The trailing
340 /// `GetField` stays as a single inert tombstone (delta 0); the
341 /// verifier walks it, leaving depth at pc+2 matching the unfused
342 /// `[LoadLocal, GetField]` pair. `body_hash` decodes to a
343 /// standalone `LoadLocal(local_idx)`; the trailing `GetField`
344 /// hashes normally.
345 ///
346 /// Safety: the trailing slot (the original `GetField`) must not
347 /// be a jump target. The first slot may be.
348 LoadLocalGetField { local_idx: u16, name_idx: u32, site_idx: u32 },
349}